From 2bdcbc8bf3b73694fd0bbfbc1907a3197b1129f9 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Wed, 6 May 2026 11:44:26 -0400
Subject: [PATCH va1 2/2] Mega-WIP: Optimized out/send path for printtup

Discussion: https://postgr.es/m/877bpghevm.fsf@163.com
---
 src/include/nodes/miscnodes.h        | 12 +++++
 src/backend/access/common/printtup.c | 35 +++++++++++--
 src/backend/utils/adt/int.c          | 73 +++++++++++++++++++++++++---
 src/backend/utils/adt/varlena.c      | 40 ++++++++++++++-
 4 files changed, 148 insertions(+), 12 deletions(-)

diff --git a/src/include/nodes/miscnodes.h b/src/include/nodes/miscnodes.h
index ec833001ab0..b3c189a5c0c 100644
--- a/src/include/nodes/miscnodes.h
+++ b/src/include/nodes/miscnodes.h
@@ -54,4 +54,16 @@ typedef struct ErrorSaveContext
 	((escontext) != NULL && IsA(escontext, ErrorSaveContext) && \
 	 ((ErrorSaveContext *) (escontext))->error_occurred)
 
+
+/*
+ * Type optionally passed to input/receive/output/send functions that allows
+ * those functions to opt into more efficient ways of performing their work
+ * (mainly reducing allocations & copies).
+ */
+typedef struct InOutContext
+{
+	NodeTag		type;
+	StringInfo	buf;
+} InOutContext;
+
 #endif							/* MISCNODES_H */
diff --git a/src/backend/access/common/printtup.c b/src/backend/access/common/printtup.c
index 6fa93a6798a..2e3eb8f56d3 100644
--- a/src/backend/access/common/printtup.c
+++ b/src/backend/access/common/printtup.c
@@ -63,6 +63,7 @@ typedef struct
 	int			nattrs;
 	PrinttupAttrInfo *myinfo;	/* Cached info about each attr */
 	StringInfoData buf;			/* output buffer (*not* in tmpcontext) */
+	InOutContext inout;			/* FunctionCallInfo->context data */
 	MemoryContext tmpcontext;	/* Memory context for per-row workspace */
 } DR_printtup;
 
@@ -142,6 +143,9 @@ printtup_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
 								  FetchPortalTargetList(portal),
 								  portal->formats);
 
+	myState->inout.type = T_InOutContext;
+	myState->inout.buf = &myState->buf;
+
 	/* ----------------
 	 * We could set up the derived attr info at this time, but we postpone it
 	 * until the first call of printtup, for 2 reasons:
@@ -297,6 +301,15 @@ printtup_prepare_info(DR_printtup *myState, TupleDesc typeinfo, int numAttrs)
 		/* both out and send funcs have one argument */
 		thisState->outstate = palloc0(SizeForFunctionCallInfo(1));
 		thisState->outstate->flinfo = &thisState->finfo;
+
+		/*
+		 * The idea here is that output functions can optionally use more
+		 * efficient paths if they see that the context is InOutContext, by
+		 * directly appending correctly formatted output into the output
+		 * buffer.
+		 */
+		thisState->outstate->context = (Node *) &myState->inout;
+		thisState->outstate->nargs = 1;
 	}
 }
 
@@ -369,7 +382,13 @@ printtup(TupleTableSlot *slot, DestReceiver *self)
 
 			outputstr = DatumGetCString(FunctionCallInvoke(thisState->outstate));
 			Assert(!thisState->outstate->isnull);
-			pq_sendcountedtext(buf, outputstr, strlen(outputstr));
+
+			/*
+			 * If outputstr == NULL, the output function directly appended a
+			 * correctly formatted message.
+			 */
+			if (outputstr)
+				pq_sendcountedtext(buf, outputstr, strlen(outputstr));
 		}
 		else
 		{
@@ -378,9 +397,17 @@ printtup(TupleTableSlot *slot, DestReceiver *self)
 
 			outputbytes = DatumGetByteaP(FunctionCallInvoke(thisState->outstate));
 			Assert(!thisState->outstate->isnull);
-			pq_sendint32(buf, VARSIZE(outputbytes) - VARHDRSZ);
-			pq_sendbytes(buf, VARDATA(outputbytes),
-						 VARSIZE(outputbytes) - VARHDRSZ);
+
+			/*
+			 * If outputbytes == NULL, the send function directly appended a
+			 * correctly formatted message.
+			 */
+			if (outputbytes)
+			{
+				pq_sendint32(buf, VARSIZE(outputbytes) - VARHDRSZ);
+				pq_sendbytes(buf, VARDATA(outputbytes),
+							 VARSIZE(outputbytes) - VARHDRSZ);
+			}
 		}
 	}
 
diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c
index 4c894a49d5d..1b7b5b5246c 100644
--- a/src/backend/utils/adt/int.c
+++ b/src/backend/utils/adt/int.c
@@ -327,10 +327,54 @@ Datum
 int4out(PG_FUNCTION_ARGS)
 {
 	int32		arg1 = PG_GETARG_INT32(0);
-	char	   *result = (char *) palloc(12);	/* sign, 10 digits, '\0' */
+	int			maxlen = 12;	/* sign, 10 digits, '\0' */
 
-	pg_ltoa(arg1, result);
-	PG_RETURN_CSTRING(result);
+	if (fcinfo->context && IsA(fcinfo->context, InOutContext))
+	{
+		/*
+		 * Optimized path for output functions called as part of a larger
+		 * ouput.
+		 *
+		 * FIXME: A good chunk of this should obviously be in helper
+		 * functions.
+		 */
+		InOutContext *inout = castNode(InOutContext, fcinfo->context);
+		StringInfo	buf = inout->buf;
+		int			prev_buflen;
+		int			len;
+		uint32		len_net;
+
+		/* reserve space for length and the max string length */
+		enlargeStringInfo(buf, sizeof(uint32) + maxlen);
+
+		/* reserve space for length, to be filled out later */
+		prev_buflen = buf->len;
+		buf->len += sizeof(uint32);
+
+		/*
+		 * Construct string directly in buffer, we don't have to care about
+		 * encoding conversions, because we assume that every encoding
+		 * embodies ascii (XXX: Is that actually true with client encodings?).
+		 */
+		len = pg_ltoa(arg1, buf->data + buf->len);
+		buf->len += len;
+
+		/* update the previously reserved length */
+		len_net = pg_hton32(len);
+		memcpy(&buf->data[prev_buflen], &len_net, sizeof(uint32));
+
+		PG_RETURN_VOID();
+	}
+	else
+	{
+		/*
+		 * Fallback path called in any other context.
+		 */
+		char	   *result = (char *) palloc(maxlen);
+
+		pg_ltoa(arg1, result);
+		PG_RETURN_CSTRING(result);
+	}
 }
 
 /*
@@ -351,11 +395,26 @@ Datum
 int4send(PG_FUNCTION_ARGS)
 {
 	int32		arg1 = PG_GETARG_INT32(0);
-	StringInfoData buf;
 
-	pq_begintypsend(&buf);
-	pq_sendint32(&buf, arg1);
-	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+	if (fcinfo->context && IsA(fcinfo->context, InOutContext))
+	{
+		InOutContext *inout = castNode(InOutContext, fcinfo->context);
+
+		/* length of data */
+		pq_sendint32(inout->buf, 4);
+		/* data itself */
+		pq_sendint32(inout->buf, arg1);
+
+		PG_RETURN_VOID();
+	}
+	else
+	{
+		StringInfoData buf;
+
+		pq_begintypsend(&buf);
+		pq_sendint32(&buf, arg1);
+		PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+	}
 }
 
 
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index c0ff51bd2fc..09913bc01f5 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -290,7 +290,45 @@ textout(PG_FUNCTION_ARGS)
 {
 	Datum		txt = PG_GETARG_DATUM(0);
 
-	PG_RETURN_CSTRING(TextDatumGetCString(txt));
+	if (fcinfo->context && IsA(fcinfo->context, InOutContext))
+	{
+		StringInfo	buf = castNode(InOutContext, fcinfo->context)->buf;
+		text	   *tunpacked = pg_detoast_datum_packed(DatumGetPointer(txt));
+		int			len = VARSIZE_ANY_EXHDR(tunpacked);
+		char	   *data = VARDATA_ANY(tunpacked);
+		char	   *data_converted;
+		size_t		data_len;
+
+		/*
+		 * Convert text output to the right encoding.  For efficiency, this
+		 * should really happen directly into buf. For that we would have to
+		 * reserve space for the length first and fill it out after
+		 * conversion.
+		 *
+		 * FIXME: Obviously we would need helpers for this too.
+		 */
+		data_converted = pg_server_to_client(data, len);
+
+		if (data == data_converted)
+			data_len = len;
+		else
+			data_len = strlen(data_converted);
+
+		/* length */
+		pq_sendint32(buf, data_len);
+
+		/* actual data */
+		appendBinaryStringInfoNT(buf, data_converted, data_len);
+
+		if (tunpacked != DatumGetPointer(txt))
+			pfree(tunpacked);
+
+		PG_RETURN_VOID();
+	}
+	else
+	{
+		PG_RETURN_CSTRING(TextDatumGetCString(txt));
+	}
 }
 
 /*
-- 
2.46.0.519.g2e7b89e038

