From 598e482bec071953d639cccd032556af8b79680d Mon Sep 17 00:00:00 2001
From: Jim Jones <jim.jones@uni-muenster.de>
Date: Fri, 3 Apr 2026 20:58:27 +0200
Subject: [PATCH v9] Add log_statement_max_length GUC to limit logged statement
 size

When log_statement is enabled, queries can be arbitrarily long and may
consume significant disk space in server logs. This patch introduces a
new GUC, log_statement_max_length, which limits the maximum byte length
of logged statements.

A value greater than zero truncates each logged statement to the given
number of bytes. The default is -1, which disables truncation and logs
full statements. If specified without units, the value is interpreted
as bytes.
---
 doc/src/sgml/config.sgml                      | 19 ++++++++
 src/backend/tcop/postgres.c                   | 38 +++++++++++++---
 src/backend/utils/error/elog.c                | 35 +++++++++++++++
 src/backend/utils/misc/guc_parameters.dat     | 10 +++++
 src/backend/utils/misc/guc_tables.c           |  1 +
 src/backend/utils/misc/postgresql.conf.sample |  2 +
 src/bin/pg_ctl/t/004_logrotate.pl             | 44 +++++++++++++++++++
 src/include/utils/elog.h                      |  1 +
 src/include/utils/guc.h                       |  1 +
 9 files changed, 145 insertions(+), 6 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index fdb77df0fdb..e2f703d5eef 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8283,6 +8283,25 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-log-statement-max-length" xreflabel="log_statement_max_length">
+      <term><varname>log_statement_max_length</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>log_statement_max_length</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        If greater than zero, each statement written to the server log
+        is truncated to at most this many bytes.
+        A value of zero causes statements to be logged with an empty body.
+        <literal>-1</literal> (the default) disables truncation.
+        If this value is specified without units, it is taken as bytes.
+        Only superusers and users with the appropriate <literal>SET</literal>
+        privilege can change this setting.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-log-replication-commands" xreflabel="log_replication_commands">
       <term><varname>log_replication_commands</varname> (<type>boolean</type>)
       <indexterm>
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 10be60011ad..e5027d07ac7 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -1024,11 +1024,15 @@ exec_simple_query(const char *query_string)
 	bool		was_logged = false;
 	bool		use_implicit_block;
 	char		msec_str[32];
+	char	   *truncated_query = NULL;
+	const char *query_log;
 
 	/*
 	 * Report query to various monitoring facilities.
 	 */
 	debug_query_string = query_string;
+	truncated_query = truncate_query_log(query_string);
+	query_log = truncated_query ? truncated_query : query_string;
 
 	pgstat_report_activity(STATE_RUNNING, query_string);
 
@@ -1073,7 +1077,7 @@ exec_simple_query(const char *query_string)
 	if (check_log_statement(parsetree_list))
 	{
 		ereport(LOG,
-				(errmsg("statement: %s", query_string),
+				(errmsg("statement: %s", query_log),
 				 errhidestmt(true),
 				 errdetail_execute(parsetree_list)));
 		was_logged = true;
@@ -1371,7 +1375,7 @@ exec_simple_query(const char *query_string)
 		case 2:
 			ereport(LOG,
 					(errmsg("duration: %s ms  statement: %s",
-							msec_str, query_string),
+							msec_str, query_log),
 					 errhidestmt(true),
 					 errdetail_execute(parsetree_list)));
 			break;
@@ -1382,6 +1386,8 @@ exec_simple_query(const char *query_string)
 
 	TRACE_POSTGRESQL_QUERY_DONE(query_string);
 
+	if (truncated_query)
+		pfree(truncated_query);
 	debug_query_string = NULL;
 }
 
@@ -1405,6 +1411,8 @@ exec_parse_message(const char *query_string,	/* string to execute */
 	bool		is_named;
 	bool		save_log_statement_stats = log_statement_stats;
 	char		msec_str[32];
+	char	   *truncated_query = NULL;
+	const char *query_log;
 
 	/*
 	 * Report query to various monitoring facilities.
@@ -1604,12 +1612,16 @@ exec_parse_message(const char *query_string,	/* string to execute */
 					 errhidestmt(true)));
 			break;
 		case 2:
+			truncated_query = truncate_query_log(query_string);
+			query_log = truncated_query ? truncated_query : query_string;
 			ereport(LOG,
 					(errmsg("duration: %s ms  parse %s: %s",
 							msec_str,
 							*stmt_name ? stmt_name : "<unnamed>",
-							query_string),
+							query_log),
 					 errhidestmt(true)));
+			if (truncated_query)
+				pfree(truncated_query);
 			break;
 	}
 
@@ -1647,6 +1659,8 @@ exec_bind_message(StringInfo input_message)
 	ParamsErrorCbData params_data;
 	ErrorContextCallback params_errcxt;
 	ListCell   *lc;
+	char	   *truncated_query = NULL;
+	const char *query_log;
 
 	/* Get the fixed part of the message */
 	portal_name = pq_getmsgstring(input_message);
@@ -2081,15 +2095,19 @@ exec_bind_message(StringInfo input_message)
 					 errhidestmt(true)));
 			break;
 		case 2:
+			truncated_query = truncate_query_log(psrc->query_string);
+			query_log = truncated_query ? truncated_query : psrc->query_string;
 			ereport(LOG,
 					(errmsg("duration: %s ms  bind %s%s%s: %s",
 							msec_str,
 							*stmt_name ? stmt_name : "<unnamed>",
 							*portal_name ? "/" : "",
 							*portal_name ? portal_name : "",
-							psrc->query_string),
+							query_log),
 					 errhidestmt(true),
 					 errdetail_params(params)));
+			if (truncated_query)
+				pfree(truncated_query);
 			break;
 	}
 
@@ -2127,6 +2145,8 @@ exec_execute_message(const char *portal_name, long max_rows)
 	const char *cmdtagname;
 	size_t		cmdtaglen;
 	ListCell   *lc;
+	char	   *truncated_source = NULL;
+	const char *source_log;
 
 	/* Adjust destination to tell printtup.c what to do */
 	dest = whereToSendOutput;
@@ -2166,6 +2186,9 @@ exec_execute_message(const char *portal_name, long max_rows)
 		prepStmtName = "<unnamed>";
 	portalParams = portal->portalParams;
 
+	truncated_source = truncate_query_log(sourceText);
+	source_log = truncated_source ? truncated_source : sourceText;
+
 	/*
 	 * Report query to various monitoring facilities.
 	 */
@@ -2237,7 +2260,7 @@ exec_execute_message(const char *portal_name, long max_rows)
 						prepStmtName,
 						*portal_name ? "/" : "",
 						*portal_name ? portal_name : "",
-						sourceText),
+						source_log),
 				 errhidestmt(true),
 				 errdetail_params(portalParams)));
 		was_logged = true;
@@ -2361,12 +2384,15 @@ exec_execute_message(const char *portal_name, long max_rows)
 							prepStmtName,
 							*portal_name ? "/" : "",
 							*portal_name ? portal_name : "",
-							sourceText),
+							source_log),
 					 errhidestmt(true),
 					 errdetail_params(portalParams)));
 			break;
 	}
 
+	if (truncated_source)
+		pfree(truncated_source);
+
 	if (save_log_statement_stats)
 		ShowUsage("EXECUTE MESSAGE STATISTICS");
 
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index c270c62e213..b9f950b5d8e 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -4235,6 +4235,41 @@ write_stderr(const char *fmt,...)
 }
 
 
+/*
+ * truncate_query_log - truncate query string if needed for logging
+ *
+ * Returns a palloc'd truncated copy if truncation is needed,
+ * or NULL if no truncation is required.
+ */
+char *
+truncate_query_log(const char *query)
+{
+	size_t		query_len;
+	size_t		truncated_len;
+	char	   *truncated_query;
+
+	/* Truncation is disabled when the limit is negative */
+	if (!query || log_statement_max_length < 0)
+		return NULL;
+
+	query_len = strlen(query);
+
+	/*
+	 * No need to allocate a truncated copy if the query is shorter
+	 * than log_statement_max_length.
+	 */
+	if (query_len <= (size_t) log_statement_max_length)
+		return NULL;
+
+	/* Truncate at a multibyte character boundary */
+	truncated_len = pg_mbcliplen(query, query_len, log_statement_max_length);
+	truncated_query = (char *) palloc(truncated_len + 1);
+	memcpy(truncated_query, query, truncated_len);
+	truncated_query[truncated_len] = '\0';
+
+	return truncated_query;
+}
+
 /*
  * Write errors to stderr (or by equal means when stderr is
  * not available) - va_list version
diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat
index e556b8844d8..78e93e2b0fd 100644
--- a/src/backend/utils/misc/guc_parameters.dat
+++ b/src/backend/utils/misc/guc_parameters.dat
@@ -1833,6 +1833,16 @@
   options => 'log_statement_options',
 },
 
+{ name => 'log_statement_max_length', type => 'int', context => 'PGC_SUSET', group => 'LOGGING_WHAT',
+  short_desc => 'Sets the maximum length in bytes of logged statements.',
+  long_desc => '-1 means no truncation.',
+  flags => 'GUC_UNIT_BYTE',
+  variable => 'log_statement_max_length',
+  boot_val => '-1',
+  min => '-1',
+  max => 'INT_MAX / 2',
+},
+
 { name => 'log_statement_sample_rate', type => 'real', context => 'PGC_SUSET', group => 'LOGGING_WHEN',
   short_desc => 'Fraction of statements exceeding "log_min_duration_sample" to be logged.',
   long_desc => 'Use a value between 0.0 (never log) and 1.0 (always log).',
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 1e14b7b4af0..ecded52bc42 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -553,6 +553,7 @@ int			log_min_duration_statement = -1;
 int			log_parameter_max_length = -1;
 int			log_parameter_max_length_on_error = 0;
 int			log_temp_files = -1;
+int			log_statement_max_length = -1;
 double		log_statement_sample_rate = 1.0;
 double		log_xact_sample_rate = 0;
 char	   *backtrace_functions;
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 2c5e98d1d4d..fca1882a0dc 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -664,6 +664,8 @@
                                         # bind-parameter values to N bytes;
                                         # -1 means print in full, 0 disables
 #log_statement = 'none'                 # none, ddl, mod, all
+#log_statement_max_length = -1          # max length of logged statements
+                                        # -1 disables truncation
 #log_replication_commands = off
 #log_temp_files = -1                    # log temporary files equal or larger
                                         # than the specified size in kilobytes;
diff --git a/src/bin/pg_ctl/t/004_logrotate.pl b/src/bin/pg_ctl/t/004_logrotate.pl
index 7b19f864673..8e8c325b8f5 100644
--- a/src/bin/pg_ctl/t/004_logrotate.pl
+++ b/src/bin/pg_ctl/t/004_logrotate.pl
@@ -135,6 +135,50 @@ check_log_pattern('stderr', $new_current_logfiles, 'syntax error', $node);
 check_log_pattern('csvlog', $new_current_logfiles, 'syntax error', $node);
 check_log_pattern('jsonlog', $new_current_logfiles, 'syntax error', $node);
 
+# Verify truncation works with ASCII.  The query is 24 bytes; with
+# log_statement_max_length = 20 it must be cut after the 20th byte ('C')
+# and must NOT contain the 21st character ('D').
+$node->append_conf('postgresql.conf', "log_statement = 'all'\nlog_statement_max_length = 20\n");
+$node->reload();
+$node->psql('postgres', "SELECT '123456789ABCDEF'");
+check_log_pattern('stderr', $new_current_logfiles, "SELECT '123456789ABC(?!D)", $node);
+
+# Verify -1 disables truncation (logs full query including closing quote)
+$node->append_conf('postgresql.conf', "log_statement_max_length = -1\n");
+$node->reload();
+$node->psql('postgres', "SELECT '123456789ABCDEF'");
+check_log_pattern('stderr', $new_current_logfiles, "SELECT '123456789ABCDEF'", $node);
+
+# Verify multibyte character handling (must not produce invalid UTF-8)
+$node->append_conf('postgresql.conf', "log_statement_max_length = 12\n");
+$node->reload();
+$node->psql('postgres', "SELECT '\xF0\x9F\x90\x98test'");
+check_log_pattern('stderr', $new_current_logfiles, "SELECT '\xF0\x9F\x90\x98(?!t)", $node);
+
+# Verify truncation via the extended query protocol.  Same 24-byte query
+# truncated to 20 bytes; the 21st character ('D') must not appear.
+$node->append_conf('postgresql.conf', "log_statement_max_length = 20\n");
+$node->reload();
+$node->psql('postgres', "SELECT '123456789ABCDEF' \\bind \\g");
+check_log_pattern('stderr', $new_current_logfiles, "execute <unnamed>: SELECT '123456789ABC(?!D)", $node);
+
+# Verify extended protocol also respects -1 (no truncation; closing quote present)
+$node->append_conf('postgresql.conf', "log_statement_max_length = -1\n");
+$node->reload();
+$node->psql('postgres', "SELECT '123456789ABCDEF' \\bind \\g");
+check_log_pattern('stderr', $new_current_logfiles, "execute <unnamed>: SELECT '123456789ABCDEF'", $node);
+
+# Verify truncation applies to the parse/bind/execute duration log entries
+# emitted by log_min_duration_statement.  log_statement must be 'none' to
+# ensure the duration entries include the statement text.
+$node->append_conf('postgresql.conf',
+	"log_statement = 'none'\nlog_min_duration_statement = 0\nlog_statement_max_length = 20\n");
+$node->reload();
+$node->psql('postgres', "SELECT '123456789ABCDEF' \\bind \\g");
+check_log_pattern('stderr', $new_current_logfiles, "parse <unnamed>: SELECT '123456789ABC(?!D)", $node);
+check_log_pattern('stderr', $new_current_logfiles, "bind <unnamed>: SELECT '123456789ABC(?!D)", $node);
+check_log_pattern('stderr', $new_current_logfiles, "execute <unnamed>: SELECT '123456789ABC(?!D)", $node);
+
 $node->stop();
 
 done_testing();
diff --git a/src/include/utils/elog.h b/src/include/utils/elog.h
index 440a02dd147..92753b08b67 100644
--- a/src/include/utils/elog.h
+++ b/src/include/utils/elog.h
@@ -509,6 +509,7 @@ extern void log_status_format(StringInfo buf, const char *format,
 extern void DebugFileOpen(void);
 extern char *unpack_sql_state(int sql_state);
 extern bool in_error_recursion_trouble(void);
+extern char *truncate_query_log(const char *query);
 
 /* Common functions shared across destinations */
 extern void reset_formatted_start_time(void);
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index dc406d6651a..8057d7870ad 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -300,6 +300,7 @@ extern PGDLLIMPORT int client_min_messages;
 extern PGDLLIMPORT int log_min_duration_sample;
 extern PGDLLIMPORT int log_min_duration_statement;
 extern PGDLLIMPORT int log_temp_files;
+extern PGDLLIMPORT int log_statement_max_length;
 extern PGDLLIMPORT double log_statement_sample_rate;
 extern PGDLLIMPORT double log_xact_sample_rate;
 extern PGDLLIMPORT char *backtrace_functions;
-- 
2.43.0

