From a7dc2084e5fa73960756c1a0fbdf3d3e4f335380 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Wed, 3 Apr 2024 20:03:45 +0200
Subject: [PATCH v20 4/4] Introduce query_id_const_merge_threshold

Replace query_id_const_merge with a threshold to allow merging only if
the number of elements is larger than specified value, which could be
configured using pg_stat_statements parameter query_id_const_merge_threshold.

Reviewed-by: Sutou Kouhei
Tested-by: Yasuo Honda
---
 .../pg_stat_statements/expected/merging.out   | 68 ++++++++++++++++++-
 .../pg_stat_statements/pg_stat_statements.c   | 36 +++++-----
 contrib/pg_stat_statements/sql/merging.sql    | 21 +++++-
 doc/src/sgml/pgstatstatements.sgml            | 23 ++++---
 src/backend/nodes/queryjumblefuncs.c          | 23 +++++--
 src/backend/postmaster/launch_backend.c       |  6 +-
 src/include/nodes/queryjumble.h               |  4 +-
 7 files changed, 137 insertions(+), 44 deletions(-)

diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
index 0cb4f67b8b7..552e248ff14 100644
--- a/contrib/pg_stat_statements/expected/merging.out
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -36,7 +36,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
 (4 rows)
 
 -- Normal scenario, too many simple constants for an IN query
-SET pg_stat_statements.query_id_const_merge = on;
+SET pg_stat_statements.query_id_const_merge_threshold = 1;
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
  t 
 ---
@@ -218,4 +218,68 @@ FROM cte;
 --------
 (0 rows)
 
-RESET pg_stat_statements.query_id_const_merge;
+-- With the threshold
+SET pg_stat_statements.query_id_const_merge_threshold = 10;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                   query                                   | calls 
+---------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) |     1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries])                |     2
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t                        |     1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                         query                         | calls 
+-------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t    |     1
+(2 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4)                  |     1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries])             |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t                     |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
+(4 rows)
+
+RESET pg_stat_statements.query_id_const_merge_threshold;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 0ade4cf515f..ffa832bbfb0 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -265,8 +265,8 @@ static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
 static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
 static ProcessUtility_hook_type prev_ProcessUtility = NULL;
 
-/* An assign hook to keep query_id_const_merge in sync */
-static void pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra);
+/* An assign hook to keep query_id_const_merge_threshold in sync */
+static void pgss_query_id_const_merge_assign_hook(int newvalue, void *extra);
 
 /* Links to shared memory state */
 static pgssSharedState *pgss = NULL;
@@ -295,8 +295,8 @@ static bool pgss_track_utility = true;	/* whether to track utility commands */
 static bool pgss_track_planning = false;	/* whether to track planning
 											 * duration */
 static bool pgss_save = true;	/* whether to save stats across shutdown */
-static bool pgss_query_id_const_merge = false;	/* request constants merging
-												 * when computing query_id */
+static int  pgss_query_id_const_merge_threshold = 0;	/* request constants merging
+														 * when computing query_id */
 
 #define pgss_enabled(level) \
 	(!IsParallelWorker() && \
@@ -459,20 +459,22 @@ _PG_init(void)
 							 NULL,
 							 NULL);
 
-	DefineCustomBoolVariable("pg_stat_statements.query_id_const_merge",
-							 "Whether to merge constants in a list when computing query_id.",
-							 NULL,
-							 &pgss_query_id_const_merge,
-							 false,
-							 PGC_SUSET,
-							 0,
-							 NULL,
-							 pgss_query_id_const_merge_assign_hook,
-							 NULL);
+	DefineCustomIntVariable("pg_stat_statements.query_id_const_merge_threshold",
+							"Whether to merge constants in a list when computing query_id.",
+							NULL,
+							&pgss_query_id_const_merge_threshold,
+							0,
+							0,
+							INT_MAX,
+							PGC_SUSET,
+							0,
+							NULL,
+							pgss_query_id_const_merge_assign_hook,
+							NULL);
 
 	MarkGUCPrefixReserved("pg_stat_statements");
 
-	SetQueryIdConstMerge(pgss_query_id_const_merge);
+	SetQueryIdConstMerge(pgss_query_id_const_merge_threshold);
 
 	/*
 	 * Install hooks.
@@ -3082,10 +3084,10 @@ comp_location(const void *a, const void *b)
 }
 
 /*
- * Notify query jumbling about query_id_const_merge status
+ * Notify query jumbling about query_id_const_merge_threshold status
  */
 static void
-pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra)
+pgss_query_id_const_merge_assign_hook(int newvalue, void *extra)
 {
 	SetQueryIdConstMerge(newvalue);
 }
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
index 657044faded..fedeb35b8f5 100644
--- a/contrib/pg_stat_statements/sql/merging.sql
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -15,7 +15,7 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
 
 -- Normal scenario, too many simple constants for an IN query
-SET pg_stat_statements.query_id_const_merge = on;
+SET pg_stat_statements.query_id_const_merge_threshold = 1;
 
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
 SELECT * FROM test_merge WHERE id IN (1);
@@ -68,4 +68,21 @@ WITH cte AS (
 SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
 FROM cte;
 
-RESET pg_stat_statements.query_id_const_merge;
+-- With the threshold
+SET pg_stat_statements.query_id_const_merge_threshold = 10;
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+RESET pg_stat_statements.query_id_const_merge_threshold;
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 12ffd021909..c939c316a37 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -605,12 +605,12 @@
    In some cases, queries with visibly different texts might get merged into a
    single <structname>pg_stat_statements</structname> entry.  Normally this
    will happen only for semantically equivalent queries, or if
-   <varname>pg_stat_statements.query_id_const_merge</varname> is enabled and
-   the only difference between queries is the length of an array with constants
-   they contain:
+   <varname>pg_stat_statements.query_id_const_merge_threshold</varname> is
+   enabled and the only difference between queries is the length of an array
+   with constants they contain:
 
 <screen>
-=# SET query_id_const_merge = on;
+=# SET query_id_const_merge_threshold = 1;
 =# SELECT pg_stat_statements_reset();
 =# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
 =# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
@@ -959,9 +959,9 @@ calls | 1
 
    <varlistentry>
     <term>
-    <varname>pg_stat_statements.query_id_const_merge</varname> (<type>bool</type>)
+    <varname>pg_stat_statements.query_id_const_merge_threshold</varname> (<type>integer</type>)
     <indexterm>
-     <primary><varname>pg_stat_statements.query_id_const_merge</varname> configuration parameter</primary>
+     <primary><varname>pg_stat_statements.query_id_const_merge_threshold</varname> configuration parameter</primary>
     </indexterm>
     </term>
 
@@ -973,11 +973,12 @@ calls | 1
       query will get multiple different identifiers, one for each occurrence
       with an array of different lenght.
 
-      If this parameter is on, an array of constants will contribute only the
-      first element, the last element and the number of elements to the query
-      identifier. It means two occurences of the same query, where the only
-      difference is number of constants in the array, are going to get the
-      same query identifier if the arrays are of similar length.
+      If this parameter is greater than 0, an array with more than
+      <varname>pg_stat_statements.query_id_const_merge_threshold</varname>
+      constants will contribute only the first element, the last element
+      and the number of elements to the query identifier. It means two
+      occurences of the same query, where the only difference is number of
+      constants in the array, are going to get the same query identifier.
       Such queries are represented in form <literal>'(... [10-99 entries])'</literal>.
 
       The parameter could be used to reduce amount of repeating data stored
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index e9ede0acb70..c25cb2be62a 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -44,8 +44,8 @@
 /* GUC parameters */
 int			compute_query_id = COMPUTE_QUERY_ID_AUTO;
 
-/* Whether to merge constants in a list when computing query_id */
-bool		query_id_const_merge = false;
+/* Lower threshold for the list length to merge constants when computing query_id */
+int			query_id_const_merge_threshold = 1;
 
 /*
  * True when compute_query_id is ON or AUTO, and a module requests them.
@@ -166,12 +166,14 @@ EnableQueryId(void)
  * Controls constants merging for query identifier computation.
  *
  * Third-party plugins can use this function to enable/disable merging
- * of constants in a list when query identifier is computed.
+ * of constants in a list when query identifier is computed. The argument
+ * specifies the lower threshold for an array length, above which merging will
+ * be applied.
  */
 void
-SetQueryIdConstMerge(bool value)
+SetQueryIdConstMerge(int threshold)
 {
-	query_id_const_merge = value;
+	query_id_const_merge_threshold = threshold;
 }
 
 /*
@@ -249,7 +251,8 @@ RecordConstLocation(JumbleState *jstate, int location, int magnitude)
 
 /*
  * Verify if the provided list contains could be merged down, which means it
- * contains only constant expressions.
+ * contains only constant expressions and the list contains more than
+ * query_id_const_merge_threshold elements.
  *
  * Return value is the order of magnitude (i.e. how many digits it has) for
  * length of the list (to use for representation purposes later on) if merging
@@ -267,12 +270,18 @@ IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
 	if (elements == NIL)
 		return 0;
 
-	if (!query_id_const_merge)
+	if (query_id_const_merge_threshold < 1)
 	{
 		/* Merging is disabled, process everything one by one */
 		return 0;
 	}
 
+	if (elements->length < query_id_const_merge_threshold)
+	{
+		/* The list is not large enough */
+		return 0;
+	}
+
 	firstExpr = linitial(elements);
 
 	/*
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index 8bd73929183..491858a116e 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -122,7 +122,7 @@ typedef struct
 	bool		redirection_done;
 	bool		IsBinaryUpgrade;
 	bool		query_id_enabled;
-	bool		query_id_const_merge;
+	int			query_id_const_merge_threshold;
 	int			max_safe_fds;
 	int			MaxBackends;
 #ifdef WIN32
@@ -743,7 +743,7 @@ save_backend_variables(BackendParameters *param, ClientSocket *client_sock,
 	param->redirection_done = redirection_done;
 	param->IsBinaryUpgrade = IsBinaryUpgrade;
 	param->query_id_enabled = query_id_enabled;
-	param->query_id_const_merge = query_id_const_merge;
+	param->query_id_const_merge_threshold = query_id_const_merge_threshold;
 	param->max_safe_fds = max_safe_fds;
 
 	param->MaxBackends = MaxBackends;
@@ -1002,7 +1002,7 @@ restore_backend_variables(BackendParameters *param)
 	redirection_done = param->redirection_done;
 	IsBinaryUpgrade = param->IsBinaryUpgrade;
 	query_id_enabled = param->query_id_enabled;
-	query_id_const_merge = param->query_id_const_merge;
+	query_id_const_merge_threshold = param->query_id_const_merge_threshold;
 	max_safe_fds = param->max_safe_fds;
 
 	MaxBackends = param->MaxBackends;
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 0e69e420b7f..90218c60531 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -77,10 +77,10 @@ extern PGDLLIMPORT int compute_query_id;
 extern const char *CleanQuerytext(const char *query, int *location, int *len);
 extern JumbleState *JumbleQuery(Query *query);
 extern void EnableQueryId(void);
-extern void SetQueryIdConstMerge(bool value);
+extern void SetQueryIdConstMerge(int threshold);
 
 extern PGDLLIMPORT bool query_id_enabled;
-extern PGDLLIMPORT bool query_id_const_merge;
+extern PGDLLIMPORT int 	query_id_const_merge_threshold;
 
 /*
  * Returns whether query identifier computation has been enabled, either
-- 
2.41.0

