From 0e8345863eabb9ada61fbe3c9c03f393682a2615 Mon Sep 17 00:00:00 2001
From: Aleksander Alekseev <aleksander@tigerdata.com>
Date: Tue, 12 May 2026 15:09:36 +0300
Subject: [PATCH v1] Decipher the comment in tsrank.c

---
 src/backend/utils/adt/tsrank.c | 34 +++++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c
index d35e5528d0a..383ad393971 100644
--- a/src/backend/utils/adt/tsrank.c
+++ b/src/backend/utils/adt/tsrank.c
@@ -336,13 +336,33 @@ calc_rank_or(const float *w, TSVector t, TSQuery q)
 					jm = j;
 				}
 			}
-/*
-			limit (sum(1/i^2),i=1,inf) = pi^2/6
-			resj = sum(wi/i^2),i=1,noccurrence,
-			wi - should be sorted desc,
-			don't sort for now, just choose maximum weight. This should be corrected
-			Oleg Bartunov
-*/
+
+			/*
+			 * The ideal score for a term is the weighted harmonic sum:
+			 *
+			 * resj = sum(wi / i^2, i = 1..noccurrences)
+			 *
+			 * where wi is the weight of the i-th occurrence and weights are
+			 * sorted in descending order so that the highest-weight
+			 * occurrence gets the smallest divisor (i=1) and thus contributes
+			 * the most.
+			 *
+			 * The result is divided by pi^2/6 ~= 1.64493406685, which is the
+			 * limit of sum(1/i^2, i=1..inf). This normalizes the score to the
+			 * [0, 1] range.
+			 *
+			 * As an approximation for efficiency, we skip the sort and
+			 * instead only promote the single highest-weight occurrence to
+			 * position i=1. This is done by taking the raw (unsorted) sum
+			 * resj, subtracting the maximum weight's actual contribution
+			 * wjm/(jm+1)^2, and adding back its corrected contribution
+			 * wjm/1^2 = wjm:
+			 *
+			 * adjusted = resj - wjm/(jm+1)^2 + wjm = wjm + resj -
+			 * wjm/(jm+1)^2
+			 *
+			 * The remaining occurrences are left in their original order.
+			 */
 			res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
 
 			entry++;
-- 
2.43.0

