From 700f7895a189fca570d1b0088c844c776636c811 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Fri, 20 Jan 2023 13:27:29 +0900
Subject: [PATCH v5 4/4] Add GUC utility_query_id

This GUC has two modes to control the computation method of query IDs
for utilities:
- 'string', the default, to hash the string query.
- 'jumble', to use the parsed tree.
---
 src/include/nodes/queryjumble.h               |  7 ++
 src/backend/nodes/queryjumblefuncs.c          | 81 ++++++++++++++-----
 src/backend/utils/misc/guc_tables.c           | 16 ++++
 src/backend/utils/misc/postgresql.conf.sample |  1 +
 doc/src/sgml/config.sgml                      | 18 +++++
 .../expected/pg_stat_statements.out           | 31 +++++++
 .../sql/pg_stat_statements.sql                | 17 ++++
 7 files changed, 151 insertions(+), 20 deletions(-)

diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 204b8f74fd..261aea6bcf 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -59,8 +59,15 @@ enum ComputeQueryIdType
 	COMPUTE_QUERY_ID_REGRESS
 };
 
+enum UtilityQueryIdType
+{
+	UTILITY_QUERY_ID_STRING,
+	UTILITY_QUERY_ID_JUMBLE
+};
+
 /* GUC parameters */
 extern PGDLLIMPORT int compute_query_id;
+extern PGDLLIMPORT int utility_query_id;
 
 
 extern const char *CleanQuerytext(const char *query, int *location, int *len);
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 278150fba0..dd9ab8f353 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -41,12 +41,15 @@
 
 /* GUC parameters */
 int			compute_query_id = COMPUTE_QUERY_ID_AUTO;
+int			utility_query_id = UTILITY_QUERY_ID_STRING;
 
 /* True when compute_query_id is ON, or AUTO and a module requests them */
 bool		query_id_enabled = false;
 
 static void AppendJumble(JumbleState *jstate,
 						 const unsigned char *item, Size size);
+static uint64 compute_utility_query_id(const char *query_text,
+									   int query_location, int query_len);
 static void RecordConstLocation(JumbleState *jstate, int location);
 static void _jumbleNode(JumbleState *jstate, Node *node);
 static void _jumbleList(JumbleState *jstate, Node *node);
@@ -102,29 +105,39 @@ JumbleQuery(Query *query, const char *querytext)
 
 	Assert(IsQueryIdEnabled());
 
-	jstate = (JumbleState *) palloc(sizeof(JumbleState));
+	if (query->utilityStmt &&
+		compute_query_id == UTILITY_QUERY_ID_STRING)
+	{
+		query->queryId = compute_utility_query_id(querytext,
+												  query->stmt_location,
+												  query->stmt_len);
+	}
+	else
+	{
+		jstate = (JumbleState *) palloc(sizeof(JumbleState));
 
-	/* Set up workspace for query jumbling */
-	jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE);
-	jstate->jumble_len = 0;
-	jstate->clocations_buf_size = 32;
-	jstate->clocations = (LocationLen *)
-		palloc(jstate->clocations_buf_size * sizeof(LocationLen));
-	jstate->clocations_count = 0;
-	jstate->highest_extern_param_id = 0;
+		/* Set up workspace for query jumbling */
+		jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE);
+		jstate->jumble_len = 0;
+		jstate->clocations_buf_size = 32;
+		jstate->clocations = (LocationLen *)
+			palloc(jstate->clocations_buf_size * sizeof(LocationLen));
+		jstate->clocations_count = 0;
+		jstate->highest_extern_param_id = 0;
 
-	/* Compute query ID and mark the Query node with it */
-	_jumbleNode(jstate, (Node *) query);
-	query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble,
-													  jstate->jumble_len,
-													  0));
+		/* Compute query ID and mark the Query node with it */
+		_jumbleNode(jstate, (Node *) query);
+		query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble,
+														  jstate->jumble_len,
+														  0));
 
-	/*
-	 * If we are unlucky enough to get a hash of zero, use 1 instead, to
-	 * prevent confusion with the utility-statement case.
-	 */
-	if (query->queryId == UINT64CONST(0))
-		query->queryId = UINT64CONST(1);
+		/*
+		 * If we are unlucky enough to get a hash of zero, use 1 instead, to
+		 * prevent confusion with the utility-statement case.
+		 */
+		if (query->queryId == UINT64CONST(0))
+			query->queryId = UINT64CONST(1);
+	}
 
 	return jstate;
 }
@@ -142,6 +155,34 @@ EnableQueryId(void)
 		query_id_enabled = true;
 }
 
+/*
+ * Compute a query identifier for the given utility query string.
+ */
+static uint64
+compute_utility_query_id(const char *query_text, int query_location, int query_len)
+{
+	uint64		queryId;
+	const char *sql;
+
+	/*
+	 * Confine our attention to the relevant part of the string, if the query
+	 * is a portion of a multi-statement source string.
+	 */
+	sql = CleanQuerytext(query_text, &query_location, &query_len);
+
+	queryId = DatumGetUInt64(hash_any_extended((const unsigned char *) sql,
+											   query_len, 0));
+
+	/*
+	 * If we are unlucky enough to get a hash of zero(invalid), use queryID as
+	 * 2 instead, queryID 1 is already in use for normal statements.
+	 */
+	if (queryId == UINT64CONST(0))
+		queryId = UINT64CONST(2);
+
+	return queryId;
+}
+
 /*
  * AppendJumble: Append a value that is substantive in a given query to
  * the current jumble.
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 4ac808ed22..97619c4e1d 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -294,6 +294,12 @@ static const struct config_enum_entry compute_query_id_options[] = {
 	{NULL, 0, false}
 };
 
+static const struct config_enum_entry utility_query_id_options[] = {
+	{"string", UTILITY_QUERY_ID_STRING, false},
+	{"jumble", UTILITY_QUERY_ID_JUMBLE, false},
+	{NULL, 0, false}
+};
+
 /*
  * Although only "on", "off", and "partition" are documented, we
  * accept all the likely variants of "on" and "off".
@@ -4574,6 +4580,16 @@ struct config_enum ConfigureNamesEnum[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"utility_query_id", PGC_SUSET, STATS_MONITORING,
+			gettext_noop("Controls method computing query ID for utilities."),
+			NULL
+		},
+		&utility_query_id,
+		UTILITY_QUERY_ID_STRING, utility_query_id_options,
+		NULL, NULL, NULL
+	},
+
 	{
 		{"constraint_exclusion", PGC_USERSET, QUERY_TUNING_OTHER,
 			gettext_noop("Enables the planner to use constraints to optimize queries."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index d06074b86f..bbf95af59d 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -623,6 +623,7 @@
 # - Monitoring -
 
 #compute_query_id = auto
+#utility_query_id = string		# string, jumble
 #log_statement_stats = off
 #log_parser_stats = off
 #log_planner_stats = off
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index dc9b78b0b7..e1e7a134cf 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8237,6 +8237,24 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-utility-query-id" xreflabel="utility_query_id">
+      <term><varname>utility_query_id</varname> (<type>enum</type>)
+      <indexterm>
+       <primary><varname>utility_query_id</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Controls the method used to compute the query identifier of a utility
+        query. Valid values are <literal>string</literal> to use a hash of the
+        query string and <literal>jumble</literal> to compute the query
+        identifier depending on the parsed tree of the utility query (less
+        performant, but allows for more parameterization of the queries
+        involved). The default is <literal>string</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-log-statement-stats">
       <term><varname>log_statement_stats</varname> (<type>boolean</type>)
       <indexterm>
diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out
index 9ac5c87c3a..8bdf8beec3 100644
--- a/contrib/pg_stat_statements/expected/pg_stat_statements.out
+++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out
@@ -554,6 +554,7 @@ DROP TABLE pgss_a, pgss_b CASCADE;
 -- utility commands
 --
 SET pg_stat_statements.track_utility = TRUE;
+SET utility_query_id = 'string';
 SELECT pg_stat_statements_reset();
  pg_stat_statements_reset 
 --------------------------
@@ -592,6 +593,36 @@ SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
  SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C" |     0 |    0
 (9 rows)
 
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SET utility_query_id = 'jumble';
+-- These queries have a different string, but the same parsing
+-- representation.
+Begin;
+Create Table test_utility_query (a int);
+Drop Table test_utility_query;
+Commit;
+BEGIN;
+CREATE TABLE test_utility_query (a int);
+DROP TABLE test_utility_query;
+COMMIT;
+SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                    query                                     | calls | rows 
+------------------------------------------------------------------------------+-------+------
+ Begin                                                                        |     2 |    0
+ Commit                                                                       |     2 |    0
+ Create Table test_utility_query (a int)                                      |     2 |    0
+ Drop Table test_utility_query                                                |     2 |    0
+ SELECT pg_stat_statements_reset()                                            |     1 |    1
+ SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C" |     0 |    0
+ SET utility_query_id = 'jumble'                                              |     1 |    0
+(7 rows)
+
+RESET utility_query_id;
 --
 -- Track the total number of rows retrieved or affected by the utility
 -- commands of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED VIEW,
diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
index 8f5c866225..81d663f81c 100644
--- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql
+++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
@@ -258,6 +258,7 @@ DROP TABLE pgss_a, pgss_b CASCADE;
 -- utility commands
 --
 SET pg_stat_statements.track_utility = TRUE;
+SET utility_query_id = 'string';
 SELECT pg_stat_statements_reset();
 
 SELECT 1;
@@ -272,6 +273,22 @@ DROP FUNCTION PLUS_TWO(INTEGER);
 
 SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
 
+SELECT pg_stat_statements_reset();
+SET utility_query_id = 'jumble';
+-- These queries have a different string, but the same parsing
+-- representation.
+Begin;
+Create Table test_utility_query (a int);
+Drop Table test_utility_query;
+Commit;
+BEGIN;
+CREATE TABLE test_utility_query (a int);
+DROP TABLE test_utility_query;
+COMMIT;
+
+SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C";
+RESET utility_query_id;
+
 --
 -- Track the total number of rows retrieved or affected by the utility
 -- commands of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED VIEW,
-- 
2.39.0

