From 72ce1762eb215ce5e047d6c470cfc24e0752e2c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Herrera?= <alvherre@kurilemu.de>
Date: Tue, 28 Oct 2025 11:48:12 +0100
Subject: [PATCH v5] pg_stat_statements: Fix handling of duplicate constant
 locations

Two or more constants can have the same location.  We handled this
correctly for non squashed constants, but failed to do it if squashed
(resulting in out-of-bounds memory access), because the code structure
became broken by commit 0f65f3eec478: we failed to update 'last_loc'
correctly when skipping these squashed constants.

The simplest fix seems to be to get rid of 'last_loc' altogether -- in
hindsight, it's quite pointless.  Also, when ignoring a constant because
of this, make sure to fulfill fill_in_constant_lengths's duty of setting
its length to -1.

Lastly, we can use == instead of <= because the locations have been
sorted beforehand, so the < case cannot arise.

Co-authored-by: Sami Imseih <samimseih@gmail.com>
Co-authored-by: Dmitry Dolgov <9erthalion6@gmail.com>
Reported-by: Konstantin Knizhnik <knizhnik@garret.ru>
Discussion: https://www.postgresql.org/message-id/2b91e358-0d99-43f7-be44-d2d4dbce37b3%40garret.ru
---
 .../pg_stat_statements/expected/squashing.out | 80 +++++++++++++++++++
 .../pg_stat_statements/pg_stat_statements.c   | 31 +++----
 contrib/pg_stat_statements/sql/squashing.sql  | 26 ++++++
 3 files changed, 122 insertions(+), 15 deletions(-)

diff --git a/contrib/pg_stat_statements/expected/squashing.out b/contrib/pg_stat_statements/expected/squashing.out
index f952f47ef7b..d5bb67c7222 100644
--- a/contrib/pg_stat_statements/expected/squashing.out
+++ b/contrib/pg_stat_statements/expected/squashing.out
@@ -809,6 +809,84 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
  select where $1 IN ($2 /*, ... */)                 |     2
 (2 rows)
 
+-- composite function with row expansion
+create table test_composite(x integer);
+CREATE FUNCTION composite_f(a integer[], out x integer, out y integer) returns
+record as $$            begin
+        x = a[1];
+        y = a[2];
+    end;
+$$ language plpgsql;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT ((composite_f(array[1, 2]))).* FROM test_composite;
+ x | y 
+---+---
+(0 rows)
+
+SELECT ((composite_f(array[1, 2, 3]))).* FROM test_composite;
+ x | y 
+---+---
+(0 rows)
+
+SELECT ((composite_f(array[1, 2, 3]))).*, 1, 2, 3, ((composite_f(array[1, 2, 3]))).*, 1, 2
+FROM test_composite
+WHERE x IN (1, 2, 3);
+ x | y | ?column? | ?column? | ?column? | x | y | ?column? | ?column? 
+---+---+----------+----------+----------+---+---+----------+----------
+(0 rows)
+
+SELECT ((composite_f(array[1, $1, 3]))).*, 1 FROM test_composite \bind 1
+;
+ x | y | ?column? 
+---+---+----------
+(0 rows)
+
+-- ROW() expression with row expansion
+SELECT (ROW(ARRAY[1,2])).*;
+  f1   
+-------
+ {1,2}
+(1 row)
+
+SELECT (ROW(ARRAY[1, 2], ARRAY[1, 2, 3])).*;
+  f1   |   f2    
+-------+---------
+ {1,2} | {1,2,3}
+(1 row)
+
+SELECT 1, 2, (ROW(ARRAY[1, 2], ARRAY[1, 2, 3])).*, 3, 4;
+ ?column? | ?column? |  f1   |   f2    | ?column? | ?column? 
+----------+----------+-------+---------+----------+----------
+        1 |        2 | {1,2} | {1,2,3} |        3 |        4
+(1 row)
+
+SELECT (ROW(ARRAY[1, 2], ARRAY[1, $1, 3])).*, 1 \bind 1
+;
+  f1   |   f2    | ?column? 
+-------+---------+----------
+ {1,2} | {1,1,3} |        1
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                                    query                                                    | calls 
+-------------------------------------------------------------------------------------------------------------+-------
+ SELECT $1, $2, (ROW(ARRAY[$3 /*, ... */], ARRAY[$4 /*, ... */])).*, $5, $6                                  |     1
+ SELECT ((composite_f(array[$1 /*, ... */]))).* FROM test_composite                                          |     2
+ SELECT ((composite_f(array[$1 /*, ... */]))).*, $2 FROM test_composite                                      |     1
+ SELECT ((composite_f(array[$1 /*, ... */]))).*, $2, $3, $4, ((composite_f(array[$5 /*, ... */]))).*, $6, $7+|     1
+ FROM test_composite                                                                                        +| 
+ WHERE x IN ($8 /*, ... */)                                                                                  | 
+ SELECT (ROW(ARRAY[$1 /*, ... */])).*                                                                        |     1
+ SELECT (ROW(ARRAY[$1 /*, ... */], ARRAY[$2 /*, ... */])).*                                                  |     1
+ SELECT (ROW(ARRAY[$1 /*, ... */], ARRAY[$2 /*, ... */])).*, $3                                              |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t                                                          |     1
+(8 rows)
+
 --
 -- cleanup
 --
@@ -818,3 +896,5 @@ DROP TABLE test_squash_numeric;
 DROP TABLE test_squash_bigint;
 DROP TABLE test_squash_cast CASCADE;
 DROP TABLE test_squash_jsonb;
+DROP TABLE test_composite;
+DROP FUNCTION composite_f;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index f2187167c5c..06a1de97c61 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2954,9 +2954,8 @@ generate_normalized_query(JumbleState *jstate, const char *query,
  * have originated from within the authoritative parser, this should not be
  * a problem.
  *
- * Duplicate constant pointers are possible, and will have their lengths
- * marked as '-1', so that they are later ignored.  (Actually, we assume the
- * lengths were initialized as -1 to start with, and don't change them here.)
+ * Multiple constants can have the same location.  We reset lengths of those
+ * past the first to -1 so that they can later be ignored.
  *
  * If query_loc > 0, then "query" has been advanced by that much compared to
  * the original string start, so we need to translate the provided locations
@@ -2976,8 +2975,6 @@ fill_in_constant_lengths(JumbleState *jstate, const char *query,
 	core_yy_extra_type yyextra;
 	core_YYSTYPE yylval;
 	YYLTYPE		yylloc;
-	int			last_loc = -1;
-	int			i;
 
 	/*
 	 * Sort the records by location so that we can process them in order while
@@ -2998,23 +2995,29 @@ fill_in_constant_lengths(JumbleState *jstate, const char *query,
 	yyextra.escape_string_warning = false;
 
 	/* Search for each constant, in sequence */
-	for (i = 0; i < jstate->clocations_count; i++)
+	for (int i = 0; i < jstate->clocations_count; i++)
 	{
 		int			loc = locs[i].location;
 		int			tok;
 
-		/* Adjust recorded location if we're dealing with partial string */
-		loc -= query_loc;
-
-		Assert(loc >= 0);
+		/* Ignore constants after the first one in the same location */
+		if (i > 0 && loc == locs[i - 1].location)
+		{
+			locs[i].length = -1;
+			continue;
+		}
 
 		if (locs[i].squashed)
 			continue;			/* squashable list, ignore */
 
-		if (loc <= last_loc)
-			continue;			/* Duplicate constant, ignore */
+		/* Adjust recorded location if we're dealing with partial string */
+		loc -= query_loc;
+		Assert(loc >= 0);
 
-		/* Lex tokens until we find the desired constant */
+		/*
+		 * We have a valid location for a constant that's not a dupe, let's
+		 * save it.  Lex tokens until we find the desired constant.
+		 */
 		for (;;)
 		{
 			tok = core_yylex(&yylval, &yylloc, yyscanner);
@@ -3060,8 +3063,6 @@ fill_in_constant_lengths(JumbleState *jstate, const char *query,
 		/* If we hit end-of-string, give up, leaving remaining lengths -1 */
 		if (tok == 0)
 			break;
-
-		last_loc = loc;
 	}
 
 	scanner_finish(yyscanner);
diff --git a/contrib/pg_stat_statements/sql/squashing.sql b/contrib/pg_stat_statements/sql/squashing.sql
index 53138d125a9..03b0515f872 100644
--- a/contrib/pg_stat_statements/sql/squashing.sql
+++ b/contrib/pg_stat_statements/sql/squashing.sql
@@ -291,6 +291,30 @@ select where '1' IN ('1'::int::text, '2'::int::text);
 select where '1' = ANY (array['1'::int::text, '2'::int::text]);
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
 
+-- composite function with row expansion
+create table test_composite(x integer);
+CREATE FUNCTION composite_f(a integer[], out x integer, out y integer) returns
+record as $$            begin
+        x = a[1];
+        y = a[2];
+    end;
+$$ language plpgsql;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT ((composite_f(array[1, 2]))).* FROM test_composite;
+SELECT ((composite_f(array[1, 2, 3]))).* FROM test_composite;
+SELECT ((composite_f(array[1, 2, 3]))).*, 1, 2, 3, ((composite_f(array[1, 2, 3]))).*, 1, 2
+FROM test_composite
+WHERE x IN (1, 2, 3);
+SELECT ((composite_f(array[1, $1, 3]))).*, 1 FROM test_composite \bind 1
+;
+-- ROW() expression with row expansion
+SELECT (ROW(ARRAY[1,2])).*;
+SELECT (ROW(ARRAY[1, 2], ARRAY[1, 2, 3])).*;
+SELECT 1, 2, (ROW(ARRAY[1, 2], ARRAY[1, 2, 3])).*, 3, 4;
+SELECT (ROW(ARRAY[1, 2], ARRAY[1, $1, 3])).*, 1 \bind 1
+;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
 --
 -- cleanup
 --
@@ -300,3 +324,5 @@ DROP TABLE test_squash_numeric;
 DROP TABLE test_squash_bigint;
 DROP TABLE test_squash_cast CASCADE;
 DROP TABLE test_squash_jsonb;
+DROP TABLE test_composite;
+DROP FUNCTION composite_f;
-- 
2.47.3

