From 19a4b41a8810bcb64c5a522473e8b0c69573b7d2 Mon Sep 17 00:00:00 2001
From: Jim Jones <jim.jones@uni-muenster.de>
Date: Wed, 8 Apr 2026 22:22:49 +0200
Subject: [PATCH v10] Add log_statement_max_length GUC to limit logged
 statement size

Queries logged by log_statement or log_min_duration_statement can be
arbitrarily long and may consume significant disk space in server logs.
This patch introduces a new GUC, log_statement_max_length, which limits
the maximum byte length of those logged statements.

A value greater than zero truncates each logged statement to the given
number of bytes, respecting multibyte character boundaries to avoid
producing invalid output. The default is -1, which logs statements in
full. If specified without units, the value is interpreted as bytes.
---
 doc/src/sgml/config.sgml                      |  21 +++
 src/backend/tcop/postgres.c                   | 134 +++++++++++++-----
 src/backend/utils/error/elog.c                |  35 +++++
 src/backend/utils/misc/guc_parameters.dat     |  10 ++
 src/backend/utils/misc/guc_tables.c           |   1 +
 src/backend/utils/misc/postgresql.conf.sample |   2 +
 src/include/utils/elog.h                      |   1 +
 src/include/utils/guc.h                       |   1 +
 src/test/modules/test_misc/meson.build        |   1 +
 .../t/012_log_statement_max_length.pl         |  96 +++++++++++++
 10 files changed, 264 insertions(+), 38 deletions(-)
 create mode 100644 src/test/modules/test_misc/t/012_log_statement_max_length.pl

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 1c8b8e7f3e2..28afa0201c0 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8473,6 +8473,27 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-log-statement-max-length" xreflabel="log_statement_max_length">
+      <term><varname>log_statement_max_length</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>log_statement_max_length</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        If greater than zero, each statement logged by
+        <xref linkend="guc-log-statement"/> or
+        <xref linkend="guc-log-min-duration-statement"/> is truncated to
+        at most this many bytes.
+        A value of zero causes statements to be logged with an empty body.
+        <literal>-1</literal> (the default) logs statements in full.
+        If this value is specified without units, it is taken as bytes.
+        Only superusers and users with the appropriate <literal>SET</literal>
+        privilege can change this setting.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-log-replication-commands" xreflabel="log_replication_commands">
       <term><varname>log_replication_commands</varname> (<type>boolean</type>)
       <indexterm>
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index aeaf1c6db8f..fcff42b4a4f 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -1083,11 +1083,20 @@ exec_simple_query(const char *query_string)
 	/* Log immediately if dictated by log_statement */
 	if (check_log_statement(parsetree_list))
 	{
+		char	   *truncated_stmt = NULL;
+
+		if (log_statement_max_length >= 0)
+			truncated_stmt = truncate_query_log(query_string);
+
 		ereport(LOG,
-				(errmsg("statement: %s", query_string),
+				(errmsg("statement: %s",
+						(truncated_stmt != NULL) ? truncated_stmt : query_string),
 				 errhidestmt(true),
 				 errdetail_execute(parsetree_list)));
 		was_logged = true;
+
+		if (truncated_stmt != NULL)
+			pfree(truncated_stmt);
 	}
 
 	/*
@@ -1380,12 +1389,23 @@ exec_simple_query(const char *query_string)
 					 errhidestmt(true)));
 			break;
 		case 2:
-			ereport(LOG,
-					(errmsg("duration: %s ms  statement: %s",
-							msec_str, query_string),
-					 errhidestmt(true),
-					 errdetail_execute(parsetree_list)));
-			break;
+			{
+				char	   *truncated_stmt = NULL;
+
+				if (log_statement_max_length >= 0)
+					truncated_stmt = truncate_query_log(query_string);
+
+				ereport(LOG,
+						(errmsg("duration: %s ms  statement: %s",
+								msec_str,
+								(truncated_stmt != NULL) ? truncated_stmt : query_string),
+						 errhidestmt(true),
+						 errdetail_execute(parsetree_list)));
+
+				if (truncated_stmt != NULL)
+					pfree(truncated_stmt);
+				break;
+			}
 	}
 
 	if (save_log_statement_stats)
@@ -1615,13 +1635,23 @@ exec_parse_message(const char *query_string,	/* string to execute */
 					 errhidestmt(true)));
 			break;
 		case 2:
-			ereport(LOG,
-					(errmsg("duration: %s ms  parse %s: %s",
-							msec_str,
-							*stmt_name ? stmt_name : "<unnamed>",
-							query_string),
-					 errhidestmt(true)));
-			break;
+			{
+				char	   *truncated_stmt = NULL;
+
+				if (log_statement_max_length >= 0)
+					truncated_stmt = truncate_query_log(query_string);
+
+				ereport(LOG,
+						(errmsg("duration: %s ms  parse %s: %s",
+								msec_str,
+								*stmt_name ? stmt_name : "<unnamed>",
+								(truncated_stmt != NULL) ? truncated_stmt : query_string),
+						 errhidestmt(true)));
+
+				if (truncated_stmt != NULL)
+					pfree(truncated_stmt);
+				break;
+			}
 	}
 
 	if (save_log_statement_stats)
@@ -2092,16 +2122,26 @@ exec_bind_message(StringInfo input_message)
 					 errhidestmt(true)));
 			break;
 		case 2:
-			ereport(LOG,
-					(errmsg("duration: %s ms  bind %s%s%s: %s",
-							msec_str,
-							*stmt_name ? stmt_name : "<unnamed>",
-							*portal_name ? "/" : "",
-							*portal_name ? portal_name : "",
-							psrc->query_string),
-					 errhidestmt(true),
-					 errdetail_params(params)));
-			break;
+			{
+				char	   *truncated_stmt = NULL;
+
+				if (log_statement_max_length >= 0)
+					truncated_stmt = truncate_query_log(psrc->query_string);
+
+				ereport(LOG,
+						(errmsg("duration: %s ms  bind %s%s%s: %s",
+								msec_str,
+								*stmt_name ? stmt_name : "<unnamed>",
+								*portal_name ? "/" : "",
+								*portal_name ? portal_name : "",
+								(truncated_stmt != NULL) ? truncated_stmt : psrc->query_string),
+						 errhidestmt(true),
+						 errdetail_params(params)));
+
+				if (truncated_stmt != NULL)
+					pfree(truncated_stmt);
+				break;
+			}
 	}
 
 	if (save_log_statement_stats)
@@ -2240,6 +2280,11 @@ exec_execute_message(const char *portal_name, long max_rows)
 	/* Log immediately if dictated by log_statement */
 	if (check_log_statement(portal->stmts))
 	{
+		char	   *truncated_source = NULL;
+
+		if (log_statement_max_length >= 0)
+			truncated_source = truncate_query_log(sourceText);
+
 		ereport(LOG,
 				(errmsg("%s %s%s%s: %s",
 						execute_is_fetch ?
@@ -2248,10 +2293,13 @@ exec_execute_message(const char *portal_name, long max_rows)
 						prepStmtName,
 						*portal_name ? "/" : "",
 						*portal_name ? portal_name : "",
-						sourceText),
+						(truncated_source != NULL) ? truncated_source : sourceText),
 				 errhidestmt(true),
 				 errdetail_params(portalParams)));
 		was_logged = true;
+
+		if (truncated_source != NULL)
+			pfree(truncated_source);
 	}
 
 	/*
@@ -2363,19 +2411,29 @@ exec_execute_message(const char *portal_name, long max_rows)
 					 errhidestmt(true)));
 			break;
 		case 2:
-			ereport(LOG,
-					(errmsg("duration: %s ms  %s %s%s%s: %s",
-							msec_str,
-							execute_is_fetch ?
-							_("execute fetch from") :
-							_("execute"),
-							prepStmtName,
-							*portal_name ? "/" : "",
-							*portal_name ? portal_name : "",
-							sourceText),
-					 errhidestmt(true),
-					 errdetail_params(portalParams)));
-			break;
+			{
+				char	   *truncated_source = NULL;
+
+				if (log_statement_max_length >= 0)
+					truncated_source = truncate_query_log(sourceText);
+
+				ereport(LOG,
+						(errmsg("duration: %s ms  %s %s%s%s: %s",
+								msec_str,
+								execute_is_fetch ?
+								_("execute fetch from") :
+								_("execute"),
+								prepStmtName,
+								*portal_name ? "/" : "",
+								*portal_name ? portal_name : "",
+								(truncated_source != NULL) ? truncated_source : sourceText),
+						 errhidestmt(true),
+						 errdetail_params(portalParams)));
+
+				if (truncated_source != NULL)
+					pfree(truncated_source);
+				break;
+			}
 	}
 
 	if (save_log_statement_stats)
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index c270c62e213..b9f950b5d8e 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -4235,6 +4235,41 @@ write_stderr(const char *fmt,...)
 }
 
 
+/*
+ * truncate_query_log - truncate query string if needed for logging
+ *
+ * Returns a palloc'd truncated copy if truncation is needed,
+ * or NULL if no truncation is required.
+ */
+char *
+truncate_query_log(const char *query)
+{
+	size_t		query_len;
+	size_t		truncated_len;
+	char	   *truncated_query;
+
+	/* Truncation is disabled when the limit is negative */
+	if (!query || log_statement_max_length < 0)
+		return NULL;
+
+	query_len = strlen(query);
+
+	/*
+	 * No need to allocate a truncated copy if the query is shorter
+	 * than log_statement_max_length.
+	 */
+	if (query_len <= (size_t) log_statement_max_length)
+		return NULL;
+
+	/* Truncate at a multibyte character boundary */
+	truncated_len = pg_mbcliplen(query, query_len, log_statement_max_length);
+	truncated_query = (char *) palloc(truncated_len + 1);
+	memcpy(truncated_query, query, truncated_len);
+	truncated_query[truncated_len] = '\0';
+
+	return truncated_query;
+}
+
 /*
  * Write errors to stderr (or by equal means when stderr is
  * not available) - va_list version
diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat
index 83af594d4af..f797ef03b4e 100644
--- a/src/backend/utils/misc/guc_parameters.dat
+++ b/src/backend/utils/misc/guc_parameters.dat
@@ -1868,6 +1868,16 @@
   options => 'log_statement_options',
 },
 
+{ name => 'log_statement_max_length', type => 'int', context => 'PGC_SUSET', group => 'LOGGING_WHAT',
+  short_desc => 'Sets the maximum length in bytes of logged statements.',
+  long_desc => '-1 means log statement in full.',
+  flags => 'GUC_UNIT_BYTE',
+  variable => 'log_statement_max_length',
+  boot_val => '-1',
+  min => '-1',
+  max => 'INT_MAX / 2',
+},
+
 { name => 'log_statement_sample_rate', type => 'real', context => 'PGC_SUSET', group => 'LOGGING_WHEN',
   short_desc => 'Fraction of statements exceeding "log_min_duration_sample" to be logged.',
   long_desc => 'Use a value between 0.0 (never log) and 1.0 (always log).',
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 290ccbc543e..90aa374b3ec 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -571,6 +571,7 @@ int			log_min_duration_statement = -1;
 int			log_parameter_max_length = -1;
 int			log_parameter_max_length_on_error = 0;
 int			log_temp_files = -1;
+int			log_statement_max_length = -1;
 double		log_statement_sample_rate = 1.0;
 double		log_xact_sample_rate = 0;
 char	   *backtrace_functions;
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index ac38cddaaf9..3bf38357f6c 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -675,6 +675,8 @@
                                         # bind-parameter values to N bytes;
                                         # -1 means print in full, 0 disables
 #log_statement = 'none'                 # none, ddl, mod, all
+#log_statement_max_length = -1          # max length of logged statements
+                                        # -1 means log statement in full
 #log_replication_commands = off
 #log_temp_files = -1                    # log temporary files equal or larger
                                         # than the specified size in kilobytes;
diff --git a/src/include/utils/elog.h b/src/include/utils/elog.h
index 440a02dd147..92753b08b67 100644
--- a/src/include/utils/elog.h
+++ b/src/include/utils/elog.h
@@ -509,6 +509,7 @@ extern void log_status_format(StringInfo buf, const char *format,
 extern void DebugFileOpen(void);
 extern char *unpack_sql_state(int sql_state);
 extern bool in_error_recursion_trouble(void);
+extern char *truncate_query_log(const char *query);
 
 /* Common functions shared across destinations */
 extern void reset_formatted_start_time(void);
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index dc406d6651a..8057d7870ad 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -300,6 +300,7 @@ extern PGDLLIMPORT int client_min_messages;
 extern PGDLLIMPORT int log_min_duration_sample;
 extern PGDLLIMPORT int log_min_duration_statement;
 extern PGDLLIMPORT int log_temp_files;
+extern PGDLLIMPORT int log_statement_max_length;
 extern PGDLLIMPORT double log_statement_sample_rate;
 extern PGDLLIMPORT double log_xact_sample_rate;
 extern PGDLLIMPORT char *backtrace_functions;
diff --git a/src/test/modules/test_misc/meson.build b/src/test/modules/test_misc/meson.build
index 1b25d98f7f3..1836264fd8b 100644
--- a/src/test/modules/test_misc/meson.build
+++ b/src/test/modules/test_misc/meson.build
@@ -20,6 +20,7 @@ tests += {
       't/009_log_temp_files.pl',
       't/010_index_concurrently_upsert.pl',
       't/011_lock_stats.pl',
+      't/012_log_statement_max_length.pl',
     ],
     # The injection points are cluster-wide, so disable installcheck
     'runningcheck': false,
diff --git a/src/test/modules/test_misc/t/012_log_statement_max_length.pl b/src/test/modules/test_misc/t/012_log_statement_max_length.pl
new file mode 100644
index 00000000000..d49936d7a66
--- /dev/null
+++ b/src/test/modules/test_misc/t/012_log_statement_max_length.pl
@@ -0,0 +1,96 @@
+# Copyright (c) 2026, PostgreSQL Global Development Group
+
+# Test log_statement_max_length GUC: verifies that logged statement text is
+# truncated at the specified byte limit, respecting multibyte boundaries, for
+# both log_statement and log_min_duration_statement logging.
+
+use strict;
+use warnings FATAL => 'all';
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+my $node = PostgreSQL::Test::Cluster->new('primary');
+$node->init();
+$node->append_conf('postgresql.conf', "log_statement = 'all'\n");
+$node->start;
+
+# Verify truncation works with ASCII.  The query is 24 bytes; with
+# log_statement_max_length = 20 it must be cut after the 20th byte ('C')
+# and must NOT contain the 21st character ('D').
+note "ASCII truncation via log_statement";
+$node->append_conf('postgresql.conf', "log_statement_max_length = 20\n");
+$node->reload();
+my $log_offset = -s $node->logfile;
+$node->psql('postgres', "SELECT '123456789ABCDEF'");
+ok( $node->log_contains(
+		qr/statement: SELECT '123456789ABC(?!D)/, $log_offset),
+	"ASCII query truncated at 20 bytes");
+
+# Verify -1 logs statement in full (closing quote must be present).
+note "-1 logs statement in full";
+$node->append_conf('postgresql.conf', "log_statement_max_length = -1\n");
+$node->reload();
+$log_offset = -s $node->logfile;
+$node->psql('postgres', "SELECT '123456789ABCDEF'");
+ok( $node->log_contains(
+		qr/statement: SELECT '123456789ABCDEF'/, $log_offset),
+	"-1 logs full query");
+
+# Verify multibyte character handling: truncation must not split a multibyte
+# character (the 🐘 emoji is 4 bytes; with limit=12 it must be kept whole
+# and the following 't' must not appear).
+note "Multibyte truncation respects character boundaries";
+$node->append_conf('postgresql.conf', "log_statement_max_length = 12\n");
+$node->reload();
+$log_offset = -s $node->logfile;
+$node->psql('postgres', "SELECT '\xF0\x9F\x90\x98test'");
+ok( $node->log_contains(
+		qr/SELECT '\xF0\x9F\x90\x98(?!t)/, $log_offset),
+	"multibyte truncation at character boundary");
+
+# Verify truncation via the extended query protocol (execute message).
+# Same 24-byte query truncated to 20 bytes; the 21st character ('D') must
+# not appear.
+note "Extended query protocol (execute) truncation";
+$node->append_conf('postgresql.conf', "log_statement_max_length = 20\n");
+$node->reload();
+$log_offset = -s $node->logfile;
+$node->psql('postgres', "SELECT '123456789ABCDEF' \\bind \\g");
+ok( $node->log_contains(
+		qr/execute <unnamed>: SELECT '123456789ABC(?!D)/, $log_offset),
+	"extended protocol execute truncated at 20 bytes");
+
+# Verify extended protocol also respects -1 (no truncation; closing quote
+# present).
+note "Extended query protocol with -1 (no truncation)";
+$node->append_conf('postgresql.conf', "log_statement_max_length = -1\n");
+$node->reload();
+$log_offset = -s $node->logfile;
+$node->psql('postgres', "SELECT '123456789ABCDEF' \\bind \\g");
+ok( $node->log_contains(
+		qr/execute <unnamed>: SELECT '123456789ABCDEF'/, $log_offset),
+	"extended protocol -1 logs full query");
+
+# Verify truncation applies to the parse/bind/execute duration log entries
+# emitted by log_min_duration_statement.  log_statement must be 'none' to
+# ensure the duration entries include the statement text.
+note "Duration logging via log_min_duration_statement";
+$node->append_conf('postgresql.conf',
+	"log_statement = 'none'\nlog_min_duration_statement = 0\nlog_statement_max_length = 20\n"
+);
+$node->reload();
+$log_offset = -s $node->logfile;
+$node->psql('postgres', "SELECT '123456789ABCDEF' \\bind \\g");
+ok( $node->log_contains(
+		qr/parse <unnamed>: SELECT '123456789ABC(?!D)/, $log_offset),
+	"parse duration entry truncated");
+ok( $node->log_contains(
+		qr/bind <unnamed>: SELECT '123456789ABC(?!D)/, $log_offset),
+	"bind duration entry truncated");
+ok( $node->log_contains(
+		qr/execute <unnamed>: SELECT '123456789ABC(?!D)/, $log_offset),
+	"execute duration entry truncated");
+
+$node->stop;
+done_testing();
-- 
2.43.0

