From df78d0bad95eb1572826a61bde6f4d6a7e1b639d Mon Sep 17 00:00:00 2001
From: Evdokimov Ilia <ilya.evdokimov@tantorlabs.com>
Date: Sun, 3 May 2026 16:49:11 +0300
Subject: [PATCH v6 1/2] ANALYZE: use cursor eviction for distinct-value
 tracking

compute_distinct_stats() currently maintains the count=1 region by
shifting entries in track[] whenever a new singleton is inserted.
That makes each replacement O(n) in the size of the singleton region.

Replace that with a round-robin cursor over the count=1 region. This
preserves FIFO eviction order for singleton candidates while making
singleton replacement O(1) and avoiding the repeated array shifts
needed by the old scheme.
---
 src/backend/commands/analyze.c | 79 ++++++++++++++++++++++++++--------
 1 file changed, 60 insertions(+), 19 deletions(-)

diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 020a5919b84..262adf41b24 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -56,7 +56,6 @@
 #include "utils/syscache.h"
 #include "utils/timestamp.h"
 
-
 /* Per-index data for ANALYZE */
 typedef struct AnlIndexData
 {
@@ -2108,10 +2107,11 @@ compute_trivial_stats(VacAttrStatsP stats,
  *
  *	The most common values are determined by brute force: we keep a list
  *	of previously seen values, ordered by number of times seen, as we scan
- *	the samples.  A newly seen value is inserted just after the last
- *	multiply-seen value, causing the bottommost (oldest) singly-seen value
- *	to drop off the list.  The accuracy of this method, and also its cost,
- *	depend mainly on the length of the list we are willing to keep.
+ *	the samples.  Newly seen values are appended to the list, and when it's
+ *	full we replace the oldest singly-seen value (FIFO) using a round-robin
+ *	cursor (clock hand) over the count=1 region.  This avoids repeatedly
+ *	shifting the count=1 region, and when hashing is enabled, avoids having
+ *	to update a large number of hash->index mappings.
  */
 static void
 compute_distinct_stats(VacAttrStatsP stats,
@@ -2138,6 +2138,8 @@ compute_distinct_stats(VacAttrStatsP stats,
 	int			track_cnt,
 				track_max;
 	int			num_mcv = stats->attstattarget;
+	int			firstcount1 = 0,	/* index of first singleton in track[] */
+				c1_cursor = 0;	/* next singleton to evict (FIFO) */
 	StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
 
 	/*
@@ -2156,7 +2158,7 @@ compute_distinct_stats(VacAttrStatsP stats,
 		Datum		value;
 		bool		isnull;
 		bool		match;
-		int			firstcount1,
+		int			match_index,
 					j;
 
 		vacuum_delay_point(true);
@@ -2213,6 +2215,7 @@ compute_distinct_stats(VacAttrStatsP stats,
 											   value, track[j].value)))
 			{
 				match = true;
+				match_index = j;
 				break;
 			}
 			if (j < firstcount1 && track[j].count == 1)
@@ -2221,31 +2224,69 @@ compute_distinct_stats(VacAttrStatsP stats,
 
 		if (match)
 		{
+			bool		was_count1;
+
 			/* Found a match */
-			track[j].count++;
+			was_count1 = (track[match_index].count == 1);
+			track[match_index].count++;
 			/* This value may now need to "bubble up" in the track list */
-			while (j > 0 && track[j].count > track[j - 1].count)
+			for (j = match_index; j > 0 && track[j].count > track[j - 1].count; j--)
 			{
 				swapDatum(track[j].value, track[j - 1].value);
 				swapInt(track[j].count, track[j - 1].count);
-				j--;
 			}
+
+			/*
+			 * If a singleton was promoted, the singleton region shrinks by
+			 * one element, so advance its boundary.
+			 */
+			if (was_count1)
+			{
+				firstcount1++;
+
+				/*
+				 * If the cursor was pointing into the singleton region,
+				 * advance it so it still refers to a valid singleton.
+				 */
+				if (c1_cursor >= firstcount1 - 1)
+				{
+					c1_cursor++;
+					if (c1_cursor >= track_cnt)
+						c1_cursor = firstcount1;
+				}
+			}
+
+			/* Final safety: keep cursor inside singleton region */
+			if (c1_cursor < firstcount1 || c1_cursor >= track_cnt)
+				c1_cursor = firstcount1;
 		}
 		else
 		{
-			/* No match.  Insert at head of count-1 list */
+			int			insert_index;
+
+			/*
+			 * No match.  Track the value if we still have room; otherwise
+			 * evict the oldest singleton from the count=1 region.
+			 */
 			if (track_cnt < track_max)
-				track_cnt++;
-			for (j = track_cnt - 1; j > firstcount1; j--)
-			{
-				track[j].value = track[j - 1].value;
-				track[j].count = track[j - 1].count;
-			}
-			if (firstcount1 < track_cnt)
+				insert_index = track_cnt++;
+			else if (firstcount1 < track_cnt)
 			{
-				track[firstcount1].value = value;
-				track[firstcount1].count = 1;
+				/*
+				 * Use c1_cursor as a round-robin cursor over the count=1
+				 * region.  Keep it on a current singleton before evicting.
+				 */
+				if (c1_cursor < firstcount1 || c1_cursor >= track_cnt)
+					c1_cursor = firstcount1;
+				insert_index = c1_cursor++;
+				if (c1_cursor >= track_cnt)
+					c1_cursor = firstcount1;
 			}
+			else
+				continue;
+
+			track[insert_index].value = value;
+			track[insert_index].count = 1;
 		}
 	}
 
-- 
2.34.1

