From 68f5451019b261bf03a222f5a05ac57cd0fb9a24 Mon Sep 17 00:00:00 2001
From: Ilia Evdokimov <ilya.evdokimov@tantorlabs.com>
Date: Thu, 21 Nov 2024 11:24:03 +0300
Subject: [PATCH] Allow setting sample ratio for pg_stat_statements

New configuration parameter pg_stat_statements.sample_ratio makes it
possible to track just a fraction of the queries meeting the configured
threshold, to reduce the amount of tracking.
---
 .../pg_stat_statements/pg_stat_statements.c   | 23 ++++++++++++++++++-
 doc/src/sgml/pgstatstatements.sgml            | 18 +++++++++++++++
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 49c657b3e0..d06e0d8a44 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -49,6 +49,7 @@
 
 #include "access/parallel.h"
 #include "catalog/pg_authid.h"
+#include "common/pg_prng.h"
 #include "common/int.h"
 #include "executor/instrument.h"
 #include "funcapi.h"
@@ -294,12 +295,16 @@ static bool pgss_track_utility = true;	/* whether to track utility commands */
 static bool pgss_track_planning = false;	/* whether to track planning
 											 * duration */
 static bool pgss_save = true;	/* whether to save stats across shutdown */
+static double pgss_sample_rate = 1;
 
+/* Is the current top-level query to be sampled? */
+static bool current_query_sampled = false;
 
 #define pgss_enabled(level) \
 	(!IsParallelWorker() && \
 	(pgss_track == PGSS_TRACK_ALL || \
-	(pgss_track == PGSS_TRACK_TOP && (level) == 0)))
+	(pgss_track == PGSS_TRACK_TOP && (level) == 0)) && \
+	current_query_sampled)
 
 #define record_gc_qtexts() \
 	do { \
@@ -414,6 +419,19 @@ _PG_init(void)
 							NULL,
 							NULL);
 
+	DefineCustomRealVariable("pg_stat_statements.sample_rate",
+							 "Fraction of queries to process.",
+							 NULL,
+							 &pgss_sample_rate,
+							 1.0,
+							 0.0,
+							 1.0,
+							 PGC_SUSET,
+							 0,
+							 NULL,
+							 NULL,
+							 NULL);
+
 	DefineCustomEnumVariable("pg_stat_statements.track",
 							 "Selects which statements are tracked by pg_stat_statements.",
 							 NULL,
@@ -835,6 +853,9 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
 	if (prev_post_parse_analyze_hook)
 		prev_post_parse_analyze_hook(pstate, query, jstate);
 
+	if (nesting_level == 0)
+		current_query_sampled = (pg_prng_double(&pg_global_prng_state) < pgss_sample_rate);
+
 	/* Safety check... */
 	if (!pgss || !pgss_hash || !pgss_enabled(nesting_level))
 		return;
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 501b468e9a..d06349d097 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -936,6 +936,24 @@
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term>
+     <varname>pg_stat_statements.sample_rate</varname> (<type>real</type>)
+     <indexterm>
+      <primary><varname>pg_stat_statements.sample_rate</varname> configuration parameter</primary>
+     </indexterm>
+    </term>
+
+    <listitem>
+     <para>
+      <varname>pg_stat_statements.sample_rate</varname> causes pg_stat_statements to only
+      track a fraction of the statements in each session.  The default is 1,
+      meaning track all the queries. In case of nested statements,
+      either all will be tracked or none. Only superusers can change this setting.
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term>
      <varname>pg_stat_statements.save</varname> (<type>boolean</type>)
-- 
2.34.1

