From d2430256e4ad3638b44c8e6100daf0a6866434e3 Mon Sep 17 00:00:00 2001
From: Mikhail Nikalayeu <mihailnikalayeu@gmail.com>
Date: Sat, 25 Jan 2025 13:33:21 +0100
Subject: [PATCH v25 07/12] Add Datum storage support to tuplestore

 Extend tuplestore to store individual Datum values:
- fixed-length datatypes: store raw bytes without a length header
- variable-length datatypes: include a length header and padding
- by-value types: store inline

This support enables usages tuplestore for non-tuple data (TIDs) in the next commit.
---
 src/backend/utils/sort/tuplestore.c | 302 ++++++++++++++++++++++------
 src/include/utils/tuplestore.h      |  33 +--
 2 files changed, 263 insertions(+), 72 deletions(-)

diff --git a/src/backend/utils/sort/tuplestore.c b/src/backend/utils/sort/tuplestore.c
index c9aecab8d66..38076f3458e 100644
--- a/src/backend/utils/sort/tuplestore.c
+++ b/src/backend/utils/sort/tuplestore.c
@@ -1,16 +1,19 @@
 /*-------------------------------------------------------------------------
  *
  * tuplestore.c
- *	  Generalized routines for temporary tuple storage.
+ *	  Generalized routines for temporary storage of tuples and Datums.
+ *
+ * This module handles temporary storage of either tuples or single
+ * Datum values for purposes such as Materialize nodes, hashjoin batch
+ * files, etc. It is essentially a dumbed-down version of tuplesort.c;
+ * it does no sorting of tuples but can only store and regurgitate a sequence
+ * of tuples.  However, because no sort is required, it is allowed to start
+ * reading the sequence before it has all been written.
+ *
+ * This is particularly useful for cursors, because it allows random access
+ * within the already-scanned portion of a query without having to process
+ * the underlying scan to completion.
  *
- * This module handles temporary storage of tuples for purposes such
- * as Materialize nodes, hashjoin batch files, etc.  It is essentially
- * a dumbed-down version of tuplesort.c; it does no sorting of tuples
- * but can only store and regurgitate a sequence of tuples.  However,
- * because no sort is required, it is allowed to start reading the sequence
- * before it has all been written.  This is particularly useful for cursors,
- * because it allows random access within the already-scanned portion of
- * a query without having to process the underlying scan to completion.
  * Also, it is possible to support multiple independent read pointers.
  *
  * A temporary file is used to handle the data if it exceeds the
@@ -61,6 +64,8 @@
 #include "executor/executor.h"
 #include "miscadmin.h"
 #include "storage/buffile.h"
+#include "utils/datum.h"
+#include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/resowner.h"
 
@@ -115,16 +120,15 @@ struct Tuplestorestate
 	BufFile    *myfile;			/* underlying file, or NULL if none */
 	MemoryContext context;		/* memory context for holding tuples */
 	ResourceOwner resowner;		/* resowner for holding temp files */
+	Oid			datumType;		/* InvalidOid or oid of Datum's to be stored */
+	int16		datumTypeLen;	/* typelen of that Datum */
+	bool		datumTypeByVal; /* by-value of that atum */
 
 	/*
 	 * These function pointers decouple the routines that must know what kind
 	 * of tuple we are handling from the routines that don't need to know it.
 	 * They are set up by the tuplestore_begin_xxx routines.
 	 *
-	 * (Although tuplestore.c currently only supports heap tuples, I've copied
-	 * this part of tuplesort.c so that extension to other kinds of objects
-	 * will be easy if it's ever needed.)
-	 *
 	 * Function to copy a supplied input tuple into palloc'd space. (NB: we
 	 * assume that a single pfree() is enough to release the tuple later, so
 	 * the representation must be "flat" in one palloc chunk.) state->availMem
@@ -143,12 +147,18 @@ struct Tuplestorestate
 
 	/*
 	 * Function to read a stored tuple from tape back into memory. 'len' is
-	 * the already-read length of the stored tuple.  Create and return a
-	 * palloc'd copy, and decrease state->availMem by the amount of memory
-	 * space consumed.
+	 * the already-known (read of constant) length of the stored tuple.
+	 * Create and return a palloc'd copy, and decrease state->availMem by the
+	 * amount of memory space consumed.
 	 */
 	void	   *(*readtup) (Tuplestorestate *state, unsigned int len);
 
+	/*
+	 * Function to get lengh of tuple from tape. Used to provide 'len' argument
+	 * for readtup (see above).
+	 */
+	unsigned int(*lentup)(Tuplestorestate *state, bool eofOK);
+
 	/*
 	 * This array holds pointers to tuples in memory if we are in state INMEM.
 	 * In states WRITEFILE and READFILE it's not used.
@@ -185,6 +195,7 @@ struct Tuplestorestate
 #define COPYTUP(state,tup)	((*(state)->copytup) (state, tup))
 #define WRITETUP(state,tup) ((*(state)->writetup) (state, tup))
 #define READTUP(state,len)	((*(state)->readtup) (state, len))
+#define LENTUP(state,eofOK)	((*(state)->lentup) (state, eofOK))
 #define LACKMEM(state)		((state)->availMem < 0)
 #define USEMEM(state,amt)	((state)->availMem -= (amt))
 #define FREEMEM(state,amt)	((state)->availMem += (amt))
@@ -193,9 +204,9 @@ struct Tuplestorestate
  *
  * NOTES about on-tape representation of tuples:
  *
- * We require the first "unsigned int" of a stored tuple to be the total size
- * on-tape of the tuple, including itself (so it is never zero).
- * The remainder of the stored tuple
+ * In case of tuples we use first "unsigned int" of a stored tuple
+ * to be the total size on-tape of the tuple, including itself
+ * (so it is never zero). The remainder of the stored tuple
  * may or may not match the in-memory representation of the tuple ---
  * any conversion needed is the job of the writetup and readtup routines.
  *
@@ -206,10 +217,13 @@ struct Tuplestorestate
  * state->backward is not set, the write/read routines may omit the extra
  * length word.
  *
+ * In case of Datum with constant lenght both "unsigned int" are ommitted.
+ *
  * writetup is expected to write both length words as well as the tuple
  * data.  When readtup is called, the tape is positioned just after the
- * front length word; readtup must read the tuple data and advance past
- * the back length word (if present).
+ * front length word (if it not ommitted like in case of contant-size Datum);
+ * readtup must read the tuple data and advance past the back length word
+ * (if present).
  *
  * The write/read routines can make use of the tuple description data
  * stored in the Tuplestorestate record, if needed. They are also expected
@@ -241,11 +255,16 @@ static Tuplestorestate *tuplestore_begin_common(int eflags,
 static void tuplestore_puttuple_common(Tuplestorestate *state, void *tuple);
 static void dumptuples(Tuplestorestate *state);
 static void tuplestore_updatemax(Tuplestorestate *state);
-static unsigned int getlen(Tuplestorestate *state, bool eofOK);
+
+static unsigned int lentup_heap(Tuplestorestate *state, bool eofOK);
 static void *copytup_heap(Tuplestorestate *state, void *tup);
 static void writetup_heap(Tuplestorestate *state, void *tup);
 static void *readtup_heap(Tuplestorestate *state, unsigned int len);
 
+static unsigned int lentup_datum(Tuplestorestate *state, bool eofOK);
+static void *copytup_datum(Tuplestorestate *state, void *datum);
+static void writetup_datum(Tuplestorestate *state, void *datum);
+static void *readtup_datum(Tuplestorestate *state, unsigned int len);
 
 /*
  *		tuplestore_begin_xxx
@@ -268,6 +287,12 @@ tuplestore_begin_common(int eflags, bool interXact, int maxKBytes)
 	state->allowedMem = maxKBytes * (int64) 1024;
 	state->availMem = state->allowedMem;
 	state->myfile = NULL;
+	/*
+	 * Set Datum related data to invalid by default.
+	 */
+	state->datumType = InvalidOid;
+	state->datumTypeLen =  0;
+	state->datumTypeByVal = false;
 
 	/*
 	 * The palloc/pfree pattern for tuple memory is in a FIFO pattern.  A
@@ -345,6 +370,36 @@ tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes)
 	state->copytup = copytup_heap;
 	state->writetup = writetup_heap;
 	state->readtup = readtup_heap;
+	state->lentup = lentup_heap;
+
+	return state;
+}
+
+/*
+ * The same as tuplestore_begin_heap but create store for Datum values.
+ */
+Tuplestorestate *
+tuplestore_begin_datum(Oid datumType, bool randomAccess, bool interXact, int maxKBytes)
+{
+	Tuplestorestate *state;
+	int			eflags;
+
+	/*
+	 * This interpretation of the meaning of randomAccess is compatible with
+	 * the pre-8.3 behavior of tuplestores.
+	 */
+	eflags = randomAccess ?
+		(EXEC_FLAG_BACKWARD | EXEC_FLAG_REWIND) :
+		(EXEC_FLAG_REWIND);
+
+	state = tuplestore_begin_common(eflags, interXact, maxKBytes);
+	state->datumType = datumType;
+	get_typlenbyval(state->datumType, &state->datumTypeLen, &state->datumTypeByVal);
+
+	state->copytup = copytup_datum;
+	state->writetup = writetup_datum;
+	state->readtup = readtup_datum;
+	state->lentup = lentup_datum;
 
 	return state;
 }
@@ -443,16 +498,19 @@ tuplestore_clear(Tuplestorestate *state)
 	{
 		int64		availMem = state->availMem;
 
-		/*
-		 * Below, we reset the memory context for storing tuples.  To save
-		 * from having to always call GetMemoryChunkSpace() on all stored
-		 * tuples, we adjust the availMem to forget all the tuples and just
-		 * recall USEMEM for the space used by the memtuples array.  Here we
-		 * just Assert that's correct and the memory tracking hasn't gone
-		 * wrong anywhere.
-		 */
-		for (i = state->memtupdeleted; i < state->memtupcount; i++)
-			availMem += GetMemoryChunkSpace(state->memtuples[i]);
+		if (!state->datumTypeByVal)
+		{
+			/*
+			 * Below, we reset the memory context for storing tuples.  To save
+			 * from having to always call GetMemoryChunkSpace() on all stored
+			 * tuples, we adjust the availMem to forget all the tuples and just
+			 * recall USEMEM for the space used by the memtuples array.  Here we
+			 * just Assert that's correct and the memory tracking hasn't gone
+			 * wrong anywhere.
+			 */
+			for (i = state->memtupdeleted; i < state->memtupcount; i++)
+				availMem += GetMemoryChunkSpace(state->memtuples[i]);
+		}
 
 		availMem += GetMemoryChunkSpace(state->memtuples);
 
@@ -776,6 +834,25 @@ tuplestore_puttuple(Tuplestorestate *state, HeapTuple tuple)
 	MemoryContextSwitchTo(oldcxt);
 }
 
+/*
+ * Like tuplestore_puttupleslot but for single Datum.
+ */
+void
+tuplestore_putdatum(Tuplestorestate *state, Datum datum)
+{
+	MemoryContext oldcxt = MemoryContextSwitchTo(state->context);
+
+	/*
+	 * Copy the Datum.  (Must do this even in WRITEFILE case.  Note that
+	 * COPYTUP includes USEMEM, so we needn't do that here.)
+	 */
+	datum = PointerGetDatum(COPYTUP(state, DatumGetPointer(datum)));
+
+	tuplestore_puttuple_common(state, DatumGetPointer(datum));
+
+	MemoryContextSwitchTo(oldcxt);
+}
+
 /*
  * Similar to tuplestore_puttuple(), but work from values + nulls arrays.
  * This avoids an extra tuple-construction operation.
@@ -1027,10 +1104,10 @@ tuplestore_gettuple(Tuplestorestate *state, bool forward,
 			/* FALLTHROUGH */
 
 		case TSS_READFILE:
-			*should_free = true;
+			*should_free = !state->datumTypeByVal;
 			if (forward)
 			{
-				if ((tuplen = getlen(state, true)) != 0)
+				if ((tuplen = LENTUP(state, true)) != 0)
 				{
 					tup = READTUP(state, tuplen);
 					return tup;
@@ -1059,7 +1136,7 @@ tuplestore_gettuple(Tuplestorestate *state, bool forward,
 				Assert(!state->truncated);
 				return NULL;
 			}
-			tuplen = getlen(state, false);
+			tuplen = LENTUP(state, false);
 
 			if (readptr->eof_reached)
 			{
@@ -1090,7 +1167,7 @@ tuplestore_gettuple(Tuplestorestate *state, bool forward,
 					Assert(!state->truncated);
 					return NULL;
 				}
-				tuplen = getlen(state, false);
+				tuplen = LENTUP(state, false);
 			}
 
 			/*
@@ -1152,6 +1229,25 @@ tuplestore_gettupleslot(Tuplestorestate *state, bool forward,
 	}
 }
 
+extern bool tuplestore_getdatum(Tuplestorestate *state, bool forward,
+								bool *should_free, Datum *result)
+{
+	Datum datum;
+	*should_free = false;
+
+	datum = (Datum) tuplestore_gettuple(state, forward, should_free);
+	if (datum)
+	{
+		*result =datum;
+		return true;
+	}
+	else
+	{
+		*result = PointerGetDatum(NULL);
+		return false;
+	}
+}
+
 /*
  * tuplestore_advance - exported function to adjust position without fetching
  *
@@ -1460,8 +1556,11 @@ tuplestore_trim(Tuplestorestate *state)
 	/* Release no-longer-needed tuples */
 	for (i = state->memtupdeleted; i < nremove; i++)
 	{
-		FREEMEM(state, GetMemoryChunkSpace(state->memtuples[i]));
-		pfree(state->memtuples[i]);
+		if (!state->datumTypeByVal)
+		{
+			FREEMEM(state, GetMemoryChunkSpace(state->memtuples[i]));
+			pfree(state->memtuples[i]);
+		}
 		state->memtuples[i] = NULL;
 	}
 	state->memtupdeleted = nremove;
@@ -1556,25 +1655,6 @@ tuplestore_in_memory(Tuplestorestate *state)
 	return (state->status == TSS_INMEM);
 }
 
-
-/*
- * Tape interface routines
- */
-
-static unsigned int
-getlen(Tuplestorestate *state, bool eofOK)
-{
-	unsigned int len;
-	size_t		nbytes;
-
-	nbytes = BufFileReadMaybeEOF(state->myfile, &len, sizeof(len), eofOK);
-	if (nbytes == 0)
-		return 0;
-	else
-		return len;
-}
-
-
 /*
  * Routines specialized for HeapTuple case
  *
@@ -1585,6 +1665,19 @@ getlen(Tuplestorestate *state, bool eofOK)
  * to write that separately.
  */
 
+static unsigned int
+lentup_heap(Tuplestorestate *state, bool eofOK)
+{
+	unsigned int len;
+	size_t		nbytes;
+
+	nbytes = BufFileReadMaybeEOF(state->myfile, &len, sizeof(len), eofOK);
+	if (nbytes == 0)
+		return 0;
+	else
+		return len;
+}
+
 static void *
 copytup_heap(Tuplestorestate *state, void *tup)
 {
@@ -1631,3 +1724,98 @@ readtup_heap(Tuplestorestate *state, unsigned int len)
 		BufFileReadExact(state->myfile, &tuplen, sizeof(tuplen));
 	return tuple;
 }
+
+/*
+ * Routines specialized for Datum case.
+ *
+ * Handles both fixed and variable-length Datums efficiently:
+ * - Fixed-length: stores raw bytes without length prefix
+ * - Variable-length: includes length prefix (and suffix if backward scan)
+ * - By-value types handled inline without extra copying
+ */
+
+static unsigned int
+lentup_datum(Tuplestorestate *state, bool eofOK)
+{
+	unsigned int len;
+	size_t		nbytes;
+
+	Assert(state->datumType != InvalidOid);
+
+	if (state->datumTypeLen > 0)
+		return state->datumTypeLen;
+
+	nbytes = BufFileReadMaybeEOF(state->myfile, &len, sizeof(len), eofOK);
+	if (nbytes == 0)
+		return 0;
+	else
+		return len;
+}
+
+static void *
+copytup_datum(Tuplestorestate *state, void* datum)
+{
+	Assert(state->datumType != InvalidOid);
+	if (state->datumTypeByVal)
+		return DatumGetPointer(PointerGetDatum(datum));
+	else
+	{
+		Datum d = datumCopy(PointerGetDatum(datum), state->datumTypeByVal, state->datumTypeLen);
+		USEMEM(state, GetMemoryChunkSpace(DatumGetPointer(d)));
+		return DatumGetPointer(d);
+	}
+}
+
+static void
+writetup_datum(Tuplestorestate *state, void* datum)
+{
+	Assert(state->datumType != InvalidOid);
+	if (state->datumTypeByVal)
+	{
+		Assert(state->datumTypeLen > 0);
+		BufFileWrite(state->myfile, datum, state->datumTypeLen);
+	}
+	else
+	{
+		Size size = state->datumTypeLen;
+		if (state->datumTypeLen < 0)
+		{
+			BufFileWrite(state->myfile, &size, sizeof(size));
+			size = datumGetSize(PointerGetDatum(datum), state->datumTypeByVal, state->datumTypeLen);
+		}
+
+		BufFileWrite(state->myfile, datum, size);
+
+		/* need trailing length word? */
+		if (state->backward && state->datumTypeLen < 0)
+			BufFileWrite(state->myfile, &size, sizeof(size));
+
+		FREEMEM(state, GetMemoryChunkSpace(datum));
+		pfree(datum);
+	}
+}
+
+static void*
+readtup_datum(Tuplestorestate *state, unsigned int len)
+{
+	Assert(state->datumType != InvalidOid);
+	if (state->datumTypeByVal)
+	{
+		Datum datum = PointerGetDatum(NULL);
+		Assert(state->datumTypeLen > 0);
+		Assert(len == state->datumTypeLen);
+		BufFileReadExact(state->myfile, &datum, state->datumTypeLen);
+		return DatumGetPointer(datum);
+	}
+	else
+	{
+		Datum *datums = palloc(len);
+		BufFileReadExact(state->myfile, &datums, len);
+
+		/* need trailing length word? */
+		if (state->backward && state->datumTypeLen < 0)
+			BufFileReadExact(state->myfile, &len, sizeof(len));
+
+		return DatumGetPointer(*datums);
+	}
+}
diff --git a/src/include/utils/tuplestore.h b/src/include/utils/tuplestore.h
index 865ba7b8265..0341c47b851 100644
--- a/src/include/utils/tuplestore.h
+++ b/src/include/utils/tuplestore.h
@@ -1,17 +1,18 @@
 /*-------------------------------------------------------------------------
  *
  * tuplestore.h
- *	  Generalized routines for temporary tuple storage.
+ *	  Generalized routines for temporary storage of tuples and Datums.
  *
- * This module handles temporary storage of tuples for purposes such
- * as Materialize nodes, hashjoin batch files, etc.  It is essentially
- * a dumbed-down version of tuplesort.c; it does no sorting of tuples
- * but can only store and regurgitate a sequence of tuples.  However,
- * because no sort is required, it is allowed to start reading the sequence
- * before it has all been written.  This is particularly useful for cursors,
- * because it allows random access within the already-scanned portion of
- * a query without having to process the underlying scan to completion.
- * Also, it is possible to support multiple independent read pointers.
+ * This module handles temporary storage of either tuples or single
+ * Datum values for purposes such as Materialize nodes, hashjoin batch
+ * files, etc. It is essentially a dumbed-down version of tuplesort.c;
+ * it does no sorting of tuples but can only store and regurgitate a sequence
+ * of tuples.  However, because no sort is required, it is allowed to start
+ * reading the sequence before it has all been written.
+ *
+ * This is particularly useful for cursors, because it allows random access
+ * within the already-scanned portion of a query without having to process
+ * the underlying scan to completion.
  *
  * A temporary file is used to handle the data if it exceeds the
  * space limit specified by the caller.
@@ -39,14 +40,13 @@
  */
 typedef struct Tuplestorestate Tuplestorestate;
 
-/*
- * Currently we only need to store MinimalTuples, but it would be easy
- * to support the same behavior for IndexTuples and/or bare Datums.
- */
-
 extern Tuplestorestate *tuplestore_begin_heap(bool randomAccess,
 											  bool interXact,
 											  int maxKBytes);
+extern Tuplestorestate *tuplestore_begin_datum(Oid datumType,
+											 bool randomAccess,
+											 bool interXact,
+											 int maxKBytes);
 
 extern void tuplestore_set_eflags(Tuplestorestate *state, int eflags);
 
@@ -55,6 +55,7 @@ extern void tuplestore_puttupleslot(Tuplestorestate *state,
 extern void tuplestore_puttuple(Tuplestorestate *state, HeapTuple tuple);
 extern void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc,
 								 const Datum *values, const bool *isnull);
+extern void tuplestore_putdatum(Tuplestorestate *state, Datum datum);
 
 extern int	tuplestore_alloc_read_pointer(Tuplestorestate *state, int eflags);
 
@@ -72,6 +73,8 @@ extern bool tuplestore_in_memory(Tuplestorestate *state);
 
 extern bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward,
 									bool copy, TupleTableSlot *slot);
+extern bool tuplestore_getdatum(Tuplestorestate *state, bool forward,
+								bool *should_free, Datum *result);
 
 extern bool tuplestore_advance(Tuplestorestate *state, bool forward);
 
-- 
2.48.1

