From 5c86bfd3e83b2bde7706746c59ac148b1553e717 Mon Sep 17 00:00:00 2001
From: David Geier <geidav.pg@gmail.com>
Date: Thu, 23 Apr 2026 11:08:33 +0200
Subject: [PATCH v1 3/3] Use correct collation for comparison

---
 contrib/pg_trgm/Makefile              |  6 +--
 contrib/pg_trgm/meson.build           |  1 +
 contrib/pg_trgm/pg_trgm--1.6--1.7.sql | 20 +++++++++
 contrib/pg_trgm/pg_trgm.control       |  2 +-
 contrib/pg_trgm/trgm_gin.c            | 65 +++++++++++++++++++++++++++
 5 files changed, 90 insertions(+), 4 deletions(-)
 create mode 100644 contrib/pg_trgm/pg_trgm--1.6--1.7.sql

diff --git a/contrib/pg_trgm/Makefile b/contrib/pg_trgm/Makefile
index 26b3028b75e..556b76f49f2 100644
--- a/contrib/pg_trgm/Makefile
+++ b/contrib/pg_trgm/Makefile
@@ -9,9 +9,9 @@ OBJS = \
 	trgm_regexp.o
 
 EXTENSION = pg_trgm
-DATA = pg_trgm--1.5--1.6.sql pg_trgm--1.4--1.5.sql pg_trgm--1.3--1.4.sql \
-	pg_trgm--1.3.sql pg_trgm--1.2--1.3.sql pg_trgm--1.1--1.2.sql \
-	pg_trgm--1.0--1.1.sql
+DATA = pg_trgm--1.6--1.7.sql pg_trgm--1.5--1.6.sql pg_trgm--1.4--1.5.sql \
+	pg_trgm--1.3--1.4.sql pg_trgm--1.3.sql pg_trgm--1.2--1.3.sql \
+	pg_trgm--1.1--1.2.sql pg_trgm--1.0--1.1.sql
 PGFILEDESC = "pg_trgm - trigram matching"
 
 REGRESS = pg_trgm pg_utf8_trgm pg_word_trgm pg_strict_word_trgm pg_trgm_collation
diff --git a/contrib/pg_trgm/meson.build b/contrib/pg_trgm/meson.build
index 5eafa774435..9fdf8c2d07e 100644
--- a/contrib/pg_trgm/meson.build
+++ b/contrib/pg_trgm/meson.build
@@ -28,6 +28,7 @@ install_data(
   'pg_trgm--1.3.sql',
   'pg_trgm--1.4--1.5.sql',
   'pg_trgm--1.5--1.6.sql',
+  'pg_trgm--1.6--1.7.sql',
   'pg_trgm.control',
   kwargs: contrib_data_args,
 )
diff --git a/contrib/pg_trgm/pg_trgm--1.6--1.7.sql b/contrib/pg_trgm/pg_trgm--1.6--1.7.sql
new file mode 100644
index 00000000000..dc5552e375e
--- /dev/null
+++ b/contrib/pg_trgm/pg_trgm--1.6--1.7.sql
@@ -0,0 +1,20 @@
+/* contrib/pg_trgm/pg_trgm--1.6--1.7.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.7'" to load this file. \quit
+
+-- Create collation-aware comparison function for trigrams
+CREATE FUNCTION gin_compare_value_trgm(int4, int4)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
+
+-- Replace btint4cmp with gin_compare_value_trgm in the operator family
+-- This ensures trigram comparisons respect collation settings
+-- First drop the old function, then add the new one
+ALTER OPERATOR FAMILY gin_trgm_ops USING gin DROP
+        FUNCTION        1 (text, text);
+
+ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
+        FUNCTION        1 (text, text) gin_compare_value_trgm (int4, int4);
+
diff --git a/contrib/pg_trgm/pg_trgm.control b/contrib/pg_trgm/pg_trgm.control
index 1d6a9ddf259..6e3ee43c510 100644
--- a/contrib/pg_trgm/pg_trgm.control
+++ b/contrib/pg_trgm/pg_trgm.control
@@ -1,6 +1,6 @@
 # pg_trgm extension
 comment = 'text similarity measurement and index searching based on trigrams'
-default_version = '1.6'
+default_version = '1.7'
 module_pathname = '$libdir/pg_trgm'
 relocatable = true
 trusted = true
diff --git a/contrib/pg_trgm/trgm_gin.c b/contrib/pg_trgm/trgm_gin.c
index 14a892c657d..dadbc789349 100644
--- a/contrib/pg_trgm/trgm_gin.c
+++ b/contrib/pg_trgm/trgm_gin.c
@@ -5,8 +5,11 @@
 
 #include "access/gin.h"
 #include "access/stratnum.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_type.h"
 #include "fmgr.h"
 #include "trgm.h"
+#include "utils/pg_locale.h"
 #include "varatt.h"
 
 PG_FUNCTION_INFO_V1(gin_extract_trgm);
@@ -14,6 +17,7 @@ PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
 PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
 PG_FUNCTION_INFO_V1(gin_trgm_consistent);
 PG_FUNCTION_INFO_V1(gin_trgm_triconsistent);
+PG_FUNCTION_INFO_V1(gin_compare_value_trgm);
 
 /*
  * This function can only be called if a pre-9.1 version of the GIN operator
@@ -169,6 +173,67 @@ gin_extract_query_trgm(PG_FUNCTION_ARGS)
 	PG_RETURN_POINTER(entries);
 }
 
+/*
+ * Compare two trigram values for GIN index.
+ * This function considers the active collation when comparing trigrams,
+ * unlike btint4cmp which treats them as plain integers.
+ */
+Datum
+gin_compare_value_trgm(PG_FUNCTION_ARGS)
+{
+	int32		a = PG_GETARG_INT32(0);
+	int32		b = PG_GETARG_INT32(1);
+	Oid			collid = PG_GET_COLLATION();
+	pg_locale_t	locale = 0;
+
+	/*
+	 * If a non-default collation is specified, we need to compare the
+	 * trigrams character-by-character using the collation's rules.
+	 */
+	if (collid != DEFAULT_COLLATION_OID)
+		locale = pg_newlocale_from_collation(collid);
+
+	if (locale && locale->collate_is_c)
+		locale = 0;				/* C collation can use simple comparison */
+
+	if (locale && locale->collate)
+	{
+		/*
+		 * For non-C collations, extract the three bytes from each trigram
+		 * and compare them using the collation's comparison function.
+		 */
+		char		str_a[3];
+		char		str_b[3];
+		int			result;
+
+		/* Extract bytes from the packed integer representation */
+		str_a[0] = (a >> 16) & 0xFF;
+		str_a[1] = (a >> 8) & 0xFF;
+		str_a[2] = a & 0xFF;
+
+		str_b[0] = (b >> 16) & 0xFF;
+		str_b[1] = (b >> 8) & 0xFF;
+		str_b[2] = b & 0xFF;
+
+		/* Use collation-aware comparison */
+		result = pg_strncoll(str_a, 3, str_b, 3, locale);
+		PG_RETURN_INT32(result);
+	}
+	else
+	{
+		/*
+		 * For C collation or default collation, simple integer comparison
+		 * is sufficient and faster.
+		 */
+		if (a < b)
+			PG_RETURN_INT32(-1);
+		else if (a > b)
+			PG_RETURN_INT32(1);
+		else
+			PG_RETURN_INT32(0);
+	}
+}
+
 Datum
 gin_trgm_consistent(PG_FUNCTION_ARGS)
 {
-- 
2.51.0

