From b4d325d11d1ec6912a4bb07c4f3ff2ce079212b8 Mon Sep 17 00:00:00 2001 From: Dmitrii Dolgov <9erthalion6@gmail.com> Date: Thu, 8 May 2025 16:41:20 +0200 Subject: [PATCH v1 2/2] Use LocationExpr in squashing For the purpose of constants squashing we have only start location of an expression, which is not enouch if the constant is wrapped e.g. in a cast function. Apply information conveyed via LocationExpr to improve squashing of constants. Based on an idea from Sami Imseih. --- .../pg_stat_statements/expected/squashing.out | 42 +++++++--- .../pg_stat_statements/pg_stat_statements.c | 35 +++------ contrib/pg_stat_statements/sql/squashing.sql | 8 +- src/backend/nodes/queryjumblefuncs.c | 76 ++++++++++++------- 4 files changed, 97 insertions(+), 64 deletions(-) diff --git a/contrib/pg_stat_statements/expected/squashing.out b/contrib/pg_stat_statements/expected/squashing.out index 7b138af098c..df9ff9d5637 100644 --- a/contrib/pg_stat_statements/expected/squashing.out +++ b/contrib/pg_stat_statements/expected/squashing.out @@ -147,6 +147,24 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (3 rows) +-- Parsing +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT WHERE 1 IN (1, 2, int4(1)); +-- +(1 row) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------+------- + SELECT WHERE $1 IN ($2 /*, ... */) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + -- FuncExpr -- Verify multiple type representation end up with the same query_id CREATE TABLE test_float (data float); @@ -246,7 +264,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; query | calls ----------------------------------------------------+------- SELECT * FROM test_squash_bigint WHERE data IN +| 1 - ($1 /*, ... */::bigint) | + ($1 /*, ... */) | SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) @@ -353,7 +371,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; query | calls ----------------------------------------------------+------- SELECT * FROM test_squash_cast WHERE data IN +| 1 - ($1 /*, ... */::int4::casttesttype) | + ($1 /*, ... */) | SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) @@ -376,7 +394,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; query | calls ----------------------------------------------------+------- SELECT * FROM test_squash_jsonb WHERE data IN +| 1 - (($1 /*, ... */)::jsonb) | + ($1 /*, ... */) | SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) @@ -393,10 +411,10 @@ SELECT * FROM test_squash WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6 (0 rows) SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; - query | calls -------------------------------------------------------------+------- - SELECT * FROM test_squash WHERE id IN ($1 /*, ... */::oid) | 1 - SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 + query | calls +-------------------------------------------------------+------- + SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) -- Test constants evaluation in a CTE, which was causing issues in the past @@ -409,7 +427,7 @@ FROM cte; -------- (0 rows) --- Simple array would be squashed as well +-- Simple array is not squashed yet SELECT pg_stat_statements_reset() IS NOT NULL AS t; t --- @@ -423,9 +441,9 @@ SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; (1 row) SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; - query | calls -----------------------------------------------------+------- - SELECT ARRAY[$1 /*, ... */] | 1 - SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 + query | calls +-------------------------------------------------------+------- + SELECT ARRAY[$1, $2, $3, $4, $5, $6, $7, $8, $9, $10] | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index 9778407cba3..314e065b364 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -2825,10 +2825,6 @@ generate_normalized_query(JumbleState *jstate, const char *query, n_quer_loc = 0, /* Normalized query byte location */ last_off = 0, /* Offset from start for previous tok */ last_tok_len = 0; /* Length (in bytes) of that tok */ - bool in_squashed = false; /* in a run of squashed consts? */ - int skipped_constants = 0; /* Position adjustment of later - * constants after squashed ones */ - /* * Get constants' lengths (core system only gives us locations). Note @@ -2886,12 +2882,9 @@ generate_normalized_query(JumbleState *jstate, const char *query, /* ... and then a param symbol replacing the constant itself */ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d", - i + 1 + jstate->highest_extern_param_id - skipped_constants); - - /* In case previous constants were merged away, stop doing that */ - in_squashed = false; + i + 1 + jstate->highest_extern_param_id); } - else if (!in_squashed) + else { /* * This location is the start position of a run of constants to be @@ -2903,27 +2896,12 @@ generate_normalized_query(JumbleState *jstate, const char *query, len_to_wrt = off - last_off; len_to_wrt -= last_tok_len; Assert(len_to_wrt >= 0); - Assert(i + 1 < jstate->clocations_count); - Assert(jstate->clocations[i + 1].squashed); memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); n_quer_loc += len_to_wrt; /* ... and then start a run of squashed constants */ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d /*, ... */", - i + 1 + jstate->highest_extern_param_id - skipped_constants); - - /* The next location will match the block below, to end the run */ - in_squashed = true; - - skipped_constants++; - } - else - { - /* - * The second location of a run of squashable elements; this - * indicates its end. - */ - in_squashed = false; + i + 1 + jstate->highest_extern_param_id); } /* Otherwise the constant is squashed away -- move forward */ @@ -3012,6 +2990,13 @@ fill_in_constant_lengths(JumbleState *jstate, const char *query, int loc = locs[i].location; int tok; + /* Squashed constants are recorded with a length set already */ + if (locs[i].squashed) + { + Assert(locs[i].length != -1); + continue; + } + /* Adjust recorded location if we're dealing with partial string */ loc -= query_loc; diff --git a/contrib/pg_stat_statements/sql/squashing.sql b/contrib/pg_stat_statements/sql/squashing.sql index 908be81ff2b..d30541a275c 100644 --- a/contrib/pg_stat_statements/sql/squashing.sql +++ b/contrib/pg_stat_statements/sql/squashing.sql @@ -49,6 +49,12 @@ SELECT * FROM test_squash WHERE id IN (@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ '-9'); SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; +-- Parsing + +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT WHERE 1 IN (1, 2, int4(1)); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + -- FuncExpr -- Verify multiple type representation end up with the same query_id @@ -163,7 +169,7 @@ WITH cte AS ( SELECT ARRAY['a', 'b', 'c', const::varchar] AS result FROM cte; --- Simple array would be squashed as well +-- Simple array is not squashed yet SELECT pg_stat_statements_reset() IS NOT NULL AS t; SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c index d1e82a63f09..ae26406bbbc 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/queryjumblefuncs.c @@ -62,8 +62,10 @@ static void AppendJumble(JumbleState *jstate, static void FlushPendingNulls(JumbleState *jstate); static void RecordConstLocation(JumbleState *jstate, int location, bool squashed); +static void RecordConstLocationRange(JumbleState *jstate, + int start, int end, bool squashed); static void _jumbleNode(JumbleState *jstate, Node *node); -static void _jumbleElements(JumbleState *jstate, List *elements); +static void _jumbleElements(JumbleState *jstate, List *elements, Node *expr); static void _jumbleA_Const(JumbleState *jstate, Node *node); static void _jumbleList(JumbleState *jstate, Node *node); static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node); @@ -403,6 +405,32 @@ RecordConstLocation(JumbleState *jstate, int location, bool squashed) } } +/* + * Similar to RecordConstLocation, RecordConstLocationRange stores a constant + * with location start and end boundaries. + */ +static void +RecordConstLocationRange(JumbleState *jstate, int start, int end, bool squashed) +{ + /* -1 indicates unknown or undefined location */ + if (start >= 0 && end >= 0) + { + /* enlarge array if needed */ + if (jstate->clocations_count >= jstate->clocations_buf_size) + { + jstate->clocations_buf_size *= 2; + jstate->clocations = (LocationLen *) + repalloc(jstate->clocations, + jstate->clocations_buf_size * + sizeof(LocationLen)); + } + jstate->clocations[jstate->clocations_count].location = start; + jstate->clocations[jstate->clocations_count].squashed = squashed; + jstate->clocations[jstate->clocations_count].length = end - start + 1; + jstate->clocations_count++; + } +} + /* * Subroutine for _jumbleElements: Verify a few simple cases where we can * deduce that the expression is a constant: @@ -461,7 +489,7 @@ IsSquashableConst(Node *element) * expressions. */ static bool -IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr) +IsSquashableConstList(List *elements) { ListCell *temp; @@ -473,13 +501,8 @@ IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr) return false; foreach(temp, elements) - { if (!IsSquashableConst(lfirst(temp))) return false; - } - - *firstExpr = linitial(elements); - *lastExpr = llast(elements); return true; } @@ -487,7 +510,7 @@ IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr) #define JUMBLE_NODE(item) \ _jumbleNode(jstate, (Node *) expr->item) #define JUMBLE_ELEMENTS(list) \ - _jumbleElements(jstate, (List *) expr->list) + _jumbleElements(jstate, (List *) expr->list, (Node *) expr) #define JUMBLE_LOCATION(location) \ RecordConstLocation(jstate, expr->location, false) #define JUMBLE_FIELD(item) \ @@ -523,28 +546,29 @@ do { \ * elements in the list. */ static void -_jumbleElements(JumbleState *jstate, List *elements) +_jumbleElements(JumbleState *jstate, List *elements, Node *expr) { - Node *first, - *last; - - if (IsSquashableConstList(elements, &first, &last)) + if (IsSquashableConstList(elements)) { + ArrayExpr *array; + /* - * If this list of elements is squashable, keep track of the location - * of its first and last elements. When reading back the locations - * array, we'll see two consecutive locations with ->squashed set to - * true, indicating the location of initial and final elements of this - * list. - * - * For the limited set of cases we support now (implicit coerce via - * FuncExpr, Const) it's fine to use exprLocation of the 'last' - * expression, but if more complex composite expressions are to be - * supported (e.g., OpExpr or FuncExpr as an explicit call), more - * sophisticated tracking will be needed. + * Currenlty only ArrayExpr provides location information, needed for + * squashing. */ - RecordConstLocation(jstate, exprLocation(first), true); - RecordConstLocation(jstate, exprLocation(last), true); + Assert(IsA(expr, ArrayExpr)); + array = (ArrayExpr *) expr; + + /* + * If the parent ArrayExpr has location information, i.e. start and the + * end of the expression, use it as boundaries for squashing. + */ + if (array->loc_range != NIL) + RecordConstLocationRange(jstate, + linitial_int(array->loc_range), + lsecond_int(array->loc_range), true); + else + _jumbleNode(jstate, (Node *) elements); } else { -- 2.45.1