From a321ef26d53b47b92ef79a08856037c3d61ca532 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@2ndquadrant.com>
Date: Thu, 5 Sep 2024 22:37:17 +0200
Subject: [PATCH v20240906 5/5] WIP: batching for sp-gist indexes

---
 src/backend/access/spgist/spgscan.c  | 142 +++++++++++++++++++++++++++
 src/backend/access/spgist/spgutils.c |   1 +
 src/include/access/spgist.h          |   1 +
 src/include/access/spgist_private.h  |  15 +++
 src/tools/pgindent/typedefs.list     |   1 +
 5 files changed, 160 insertions(+)

diff --git a/src/backend/access/spgist/spgscan.c b/src/backend/access/spgist/spgscan.c
index 03293a7816e..8a6a6059af5 100644
--- a/src/backend/access/spgist/spgscan.c
+++ b/src/backend/access/spgist/spgscan.c
@@ -1079,6 +1079,148 @@ spggettuple(IndexScanDesc scan, ScanDirection dir)
 	return false;
 }
 
+static void
+AssertCheckSpGistBatchInfo(SpGistScanOpaque so)
+{
+#ifdef USE_ASSERT_CHECKING
+	/* should be valid items (with respect to the current leaf page) */
+	Assert(0 <= so->batch.firstIndex);
+	Assert(so->batch.firstIndex <= so->batch.lastIndex);
+	Assert(so->batch.lastIndex <= so->nPtrs);
+#endif
+}
+
+/*
+ * FIXME Does this need to worry about recheck/recheckDistances flags in
+ * GISTScanOpaque? Probably yes.
+ *
+ * FIXME Definitely should return recontup for IOS, but that needs changes
+ * to index_batch_add.
+ */
+static void
+_spgist_copy_batch(IndexScanDesc scan, SpGistScanOpaque so,
+				   int start, int end)
+{
+	/*
+	 * We're reading the first batch, and there should always be at least one
+	 * item (otherwise _bt_first would return false). So we should never get
+	 * into situation with empty start/end range. In the worst case, there is
+	 * just a single item, in which case (start == end).
+	 */
+	Assert(start <= end);
+
+	/* The range of items should fit into the current batch size. */
+	Assert((end - start + 1) <= scan->xs_batch->currSize);
+
+	so->batch.firstIndex = start;
+	so->batch.lastIndex = end;
+
+	AssertCheckSpGistBatchInfo(so);
+
+	/*
+	 * Walk through the range of index tuples, copy them into the batch. If
+	 * requested, set the index tuple too.
+	 *
+	 * We don't know if the batch is full already - we just try to add it, and
+	 * bail out if it fails.
+	 *
+	 * FIXME This seems wrong, actually. We use currSize when calculating the
+	 * start/end range, so the add should always succeed.
+	 */
+	while (start <= end)
+	{
+		bool		recheck = so->recheck[start];
+		HeapTuple	htup = NULL;
+
+		if (so->want_itup)
+			htup = so->reconTups[start];
+
+		if (so->numberOfOrderBys > 0)
+			index_store_float8_orderby_distances(scan, so->orderByTypes,
+												 so->distances[so->iPtr],
+												 so->recheckDistances[so->iPtr]);
+
+		/* try to add it to batch, if there's space */
+		if (!index_batch_add(scan, so->heapPtrs[start], recheck, NULL, htup))
+			break;
+
+		start++;
+	}
+
+	/*
+	 * set the starting point
+	 *
+	 * XXX might be better done in indexam.c
+	 */
+	scan->xs_batch->currIndex = -1;
+
+	/* shouldn't be possible to end here with an empty batch */
+	Assert(scan->xs_batch->nheaptids > 0);
+}
+
+
+bool
+spggetbatch(IndexScanDesc scan, ScanDirection dir)
+{
+	SpGistScanOpaque so = (SpGistScanOpaque) scan->opaque;
+
+	if (dir != ForwardScanDirection)
+		elog(ERROR, "SP-GiST only supports forward scan direction");
+
+	/* Copy want_itup to *so so we don't need to pass it around separately */
+	so->want_itup = scan->xs_want_itup;
+
+	for (;;)
+	{
+		int			start,
+					end;
+
+		/* forward directions only, easy to calculate next batch */
+		start = so->batch.lastIndex + 1;
+		end = Min(start + (scan->xs_batch->currSize - 1),
+				  so->nPtrs - 1);	/* index of last item */
+		so->iPtr = (end + 1);
+
+		/* if we found more items on the current page, we're done */
+		if (start <= end)
+		{
+			_spgist_copy_batch(scan, so, start, end);
+			return true;
+		}
+
+		if (so->numberOfOrderBys > 0)
+		{
+			/* Must pfree distances to avoid memory leak */
+			int			i;
+
+			for (i = 0; i < so->nPtrs; i++)
+				if (so->distances[i])
+					pfree(so->distances[i]);
+		}
+
+		if (so->want_itup)
+		{
+			/* Must pfree reconstructed tuples to avoid memory leak */
+			int			i;
+
+			for (i = 0; i < so->nPtrs; i++)
+				pfree(so->reconTups[i]);
+		}
+		so->iPtr = so->nPtrs = 0;
+
+		spgWalk(scan->indexRelation, so, false, storeGettuple);
+
+		if (so->nPtrs == 0)
+			break;				/* must have completed scan */
+
+		/* reset before loading data from batch */
+		so->batch.firstIndex = -1;
+		so->batch.lastIndex = -1;
+	}
+
+	return false;
+}
+
 bool
 spgcanreturn(Relation index, int attno)
 {
diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c
index 76b80146ff0..fc685ffa2aa 100644
--- a/src/backend/access/spgist/spgutils.c
+++ b/src/backend/access/spgist/spgutils.c
@@ -84,6 +84,7 @@ spghandler(PG_FUNCTION_ARGS)
 	amroutine->ambeginscan = spgbeginscan;
 	amroutine->amrescan = spgrescan;
 	amroutine->amgettuple = spggettuple;
+	amroutine->amgetbatch = spggetbatch;
 	amroutine->amgetbitmap = spggetbitmap;
 	amroutine->amendscan = spgendscan;
 	amroutine->ammarkpos = NULL;
diff --git a/src/include/access/spgist.h b/src/include/access/spgist.h
index d6a49531200..f879843b3bb 100644
--- a/src/include/access/spgist.h
+++ b/src/include/access/spgist.h
@@ -209,6 +209,7 @@ extern void spgrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
 					  ScanKey orderbys, int norderbys);
 extern int64 spggetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
 extern bool spggettuple(IndexScanDesc scan, ScanDirection dir);
+extern bool spggetbatch(IndexScanDesc scan, ScanDirection dir);
 extern bool spgcanreturn(Relation index, int attno);
 
 /* spgvacuum.c */
diff --git a/src/include/access/spgist_private.h b/src/include/access/spgist_private.h
index e7cbe10a89b..15a5e77c5d3 100644
--- a/src/include/access/spgist_private.h
+++ b/src/include/access/spgist_private.h
@@ -183,6 +183,19 @@ typedef struct SpGistSearchItem
 #define SizeOfSpGistSearchItem(n_distances) \
 	(offsetof(SpGistSearchItem, distances) + sizeof(double) * (n_distances))
 
+/*
+ * Information about the current batch (in batched index scans)
+ *
+ * XXX Probably not needed, as spgist supports just forward scans, so we
+ * could simply the iPtr (no problem after change of scan direction).
+ */
+typedef struct SpGistBatchInfo
+{
+	/* Current range of items in a batch (if used). */
+	int			firstIndex;
+	int			lastIndex;
+} SpGistBatchInfo;
+
 /*
  * Private state of an index scan
  */
@@ -235,6 +248,8 @@ typedef struct SpGistScanOpaqueData
 	/* distances (for recheck) */
 	IndexOrderByDistance *distances[MaxIndexTuplesPerPage];
 
+	SpGistBatchInfo batch;		/* batch loaded from the index */
+
 	/*
 	 * Note: using MaxIndexTuplesPerPage above is a bit hokey since
 	 * SpGistLeafTuples aren't exactly IndexTuples; however, they are larger,
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index ac78d153d4f..fbbdb330fc5 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2702,6 +2702,7 @@ SortSupportData
 SortTuple
 SortTupleComparator
 SortedPoint
+SpGistBatchInfo
 SpGistBuildState
 SpGistCache
 SpGistDeadTuple
-- 
2.46.0

