From 2c4376dfa6e4687f0a7ac0869938665f22230c0e Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@vondra.me>
Date: Wed, 18 Mar 2026 20:55:00 +0100
Subject: [PATCH v7 5/6] show prefetch stats for TidRangeScan

- enable show_scan_io_usage for TidRangeScan

- add infrastructure to allocate/collect instrumentation from parallel
  workers
---
 src/backend/commands/explain.c          |  32 ++++++
 src/backend/executor/execParallel.c     |   3 +
 src/backend/executor/nodeTidrangescan.c | 142 +++++++++++++++++++++---
 src/include/executor/instrument_node.h  |  11 ++
 src/include/executor/nodeTidrangescan.h |   1 +
 src/include/nodes/execnodes.h           |   1 +
 6 files changed, 177 insertions(+), 13 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 20c5d720516..99b64d87126 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -2151,6 +2151,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 				if (plan->qual)
 					show_instrumentation_count("Rows Removed by Filter", 1,
 											   planstate, es);
+				show_scan_io_usage((ScanState *) planstate, es);
 			}
 			break;
 		case T_ForeignScan:
@@ -4112,6 +4113,27 @@ show_scan_io_usage(ScanState *planstate, ExplainState *es)
 					}
 				}
 
+				break;
+			}
+		case T_TidRangeScan:
+			{
+				SharedTidRangeScanInstrumentation *sinstrument
+					= ((TidRangeScanState *) planstate)->trss_sinstrument;
+
+				/* collect prefetch statistics from the read stream */
+				stats = planstate->ss_currentScanDesc->rs_instrument->io;
+
+				/* get the sum of the counters set within each and every process */
+				if (sinstrument)
+				{
+					for (int i = 0; i < sinstrument->num_workers; ++i)
+					{
+						TidRangeScanInstrumentation *winstrument = &sinstrument->sinstrument[i];
+
+						AccumulateIOStats(&stats, &winstrument->stats.io);
+					}
+				}
+
 				break;
 			}
 		default:
@@ -4164,6 +4186,16 @@ show_worker_io_usage(PlanState *planstate, ExplainState *es, int worker)
 				instrument = &sinstrument->sinstrument[worker];
 				stats = &instrument->stats.io;
 
+				break;
+			}
+		case T_TidRangeScan:
+			{
+				TidRangeScanState *state = ((TidRangeScanState *) planstate);
+				SharedTidRangeScanInstrumentation *sinstrument = state->trss_sinstrument;
+				TidRangeScanInstrumentation *instrument = &sinstrument->sinstrument[worker];
+
+				stats = &instrument->stats.io;
+
 				break;
 			}
 		default:
diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c
index c6f7ee292dd..3bb5b0d1cd2 100644
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -1128,6 +1128,9 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate,
 		case T_SeqScanState:
 			ExecSeqScanRetrieveInstrumentation((SeqScanState *) planstate);
 			break;
+		case T_TidRangeScanState:
+			ExecTidRangeScanRetrieveInstrumentation((TidRangeScanState *) planstate);
+			break;
 		default:
 			break;
 	}
diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c
index 4a8fe91b2b3..9b6e2b91183 100644
--- a/src/backend/executor/nodeTidrangescan.c
+++ b/src/backend/executor/nodeTidrangescan.c
@@ -242,12 +242,19 @@ TidRangeNext(TidRangeScanState *node)
 
 		if (scandesc == NULL)
 		{
+			uint32	flags = SO_NONE;
+
+			if (ScanRelIsReadOnly(&node->ss))
+				flags |= SO_HINT_REL_READ_ONLY;
+
+			if (estate->es_instrument)
+				flags |= SO_SCAN_INSTRUMENT;
+
 			scandesc = table_beginscan_tidrange(node->ss.ss_currentRelation,
 												estate->es_snapshot,
 												&node->trss_mintid,
 												&node->trss_maxtid,
-												ScanRelIsReadOnly(&node->ss) ?
-												SO_HINT_REL_READ_ONLY : SO_NONE);
+												flags);
 			node->ss.ss_currentScanDesc = scandesc;
 		}
 		else
@@ -342,6 +349,34 @@ ExecEndTidRangeScan(TidRangeScanState *node)
 {
 	TableScanDesc scan = node->ss.ss_currentScanDesc;
 
+	/*
+	 * When ending a parallel worker, copy the statistics gathered by the
+	 * worker back into shared memory so that it can be picked up by the main
+	 * process to report in EXPLAIN ANALYZE.
+	 */
+	if (node->trss_sinstrument != NULL && IsParallelWorker())
+	{
+		TidRangeScanInstrumentation *si;
+
+		Assert(ParallelWorkerNumber < node->trss_sinstrument->num_workers);
+		si = &node->trss_sinstrument->sinstrument[ParallelWorkerNumber];
+
+		/*
+		 * Here we accumulate the stats rather than performing memcpy on
+		 * node->stats into si.  When a Gather/GatherMerge node finishes it
+		 * will perform planner shutdown on the workers.  On rescan it will
+		 * spin up new workers which will have a new state and zeroed stats.
+		 */
+
+		/* collect prefetch info for this process from the read_stream */
+		if (node->ss.ss_currentScanDesc &&
+			node->ss.ss_currentScanDesc->rs_instrument)
+		{
+			AccumulateIOStats(&si->stats.io,
+							  &node->ss.ss_currentScanDesc->rs_instrument->io);
+		}
+	}
+
 	if (scan != NULL)
 		table_endscan(scan);
 }
@@ -435,11 +470,23 @@ void
 ExecTidRangeScanEstimate(TidRangeScanState *node, ParallelContext *pcxt)
 {
 	EState	   *estate = node->ss.ps.state;
+	Size		size;
+
+	size = table_parallelscan_estimate(node->ss.ss_currentRelation,
+									   estate->es_snapshot);
+	node->trss_pscanlen = size;
+
+	/* make sure the instrumentation is properly aligned */
+	size = MAXALIGN(size);
+
+	/* account for instrumentation, if required */
+	if (node->ss.ps.instrument && pcxt->nworkers > 0)
+	{
+		size = add_size(size, offsetof(SharedTidRangeScanInstrumentation, sinstrument));
+		size = add_size(size, mul_size(pcxt->nworkers, sizeof(TidRangeScanInstrumentation)));
+	}
 
-	node->trss_pscanlen =
-		table_parallelscan_estimate(node->ss.ss_currentRelation,
-									estate->es_snapshot);
-	shm_toc_estimate_chunk(&pcxt->estimator, node->trss_pscanlen);
+	shm_toc_estimate_chunk(&pcxt->estimator, size);
 	shm_toc_estimate_keys(&pcxt->estimator, 1);
 }
 
@@ -454,17 +501,50 @@ ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt)
 {
 	EState	   *estate = node->ss.ps.state;
 	ParallelTableScanDesc pscan;
+	SharedTidRangeScanInstrumentation *sinstrument = NULL;
+	Size	size;
+	char   *ptr;
+	uint32	flags = SO_NONE;
+
+	if (ScanRelIsReadOnly(&node->ss))
+		flags |= SO_HINT_REL_READ_ONLY;
+
+	if (estate->es_instrument)
+		flags |= SO_SCAN_INSTRUMENT;
+
+	size = MAXALIGN(node->trss_pscanlen);
+	if (node->ss.ps.instrument && pcxt->nworkers > 0)
+	{
+		size = add_size(size, offsetof(SharedTidRangeScanInstrumentation, sinstrument));
+		size = add_size(size, mul_size(pcxt->nworkers, sizeof(TidRangeScanInstrumentation)));
+	}
 
-	pscan = shm_toc_allocate(pcxt->toc, node->trss_pscanlen);
+	pscan = shm_toc_allocate(pcxt->toc, size);
+	pscan->phs_len = node->trss_pscanlen;
 	table_parallelscan_initialize(node->ss.ss_currentRelation,
 								  pscan,
 								  estate->es_snapshot);
 	shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan);
 	node->ss.ss_currentScanDesc =
 		table_beginscan_parallel_tidrange(node->ss.ss_currentRelation,
-										  pscan,
-										  ScanRelIsReadOnly(&node->ss) ?
-										  SO_HINT_REL_READ_ONLY : SO_NONE);
+										  pscan, flags);
+
+	/* initialize the shared instrumentation (with correct alignment) */
+	ptr = (char *) pscan;
+	ptr += MAXALIGN(node->trss_pscanlen);
+	if (node->ss.ps.instrument && pcxt->nworkers > 0)
+		sinstrument = (SharedTidRangeScanInstrumentation *) ptr;
+
+	if (sinstrument)
+	{
+		sinstrument->num_workers = pcxt->nworkers;
+
+		/* ensure any unfilled slots will contain zeroes */
+		memset(sinstrument->sinstrument, 0,
+			   pcxt->nworkers * sizeof(TidRangeScanInstrumentation));
+	}
+
+	node->trss_sinstrument = sinstrument;
 }
 
 /* ----------------------------------------------------------------
@@ -493,12 +573,48 @@ void
 ExecTidRangeScanInitializeWorker(TidRangeScanState *node,
 								 ParallelWorkerContext *pwcxt)
 {
+	EState	   *estate = node->ss.ps.state;
 	ParallelTableScanDesc pscan;
+	char	   *ptr;
+	uint32		flags = SO_NONE;
+
+	if (ScanRelIsReadOnly(&node->ss))
+		flags |= SO_HINT_REL_READ_ONLY;
+
+	if (estate->es_instrument)
+		flags |= SO_SCAN_INSTRUMENT;
 
 	pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
 	node->ss.ss_currentScanDesc =
 		table_beginscan_parallel_tidrange(node->ss.ss_currentRelation,
-										  pscan,
-										  ScanRelIsReadOnly(&node->ss) ?
-										  SO_HINT_REL_READ_ONLY : SO_NONE);
+										  pscan, flags);
+
+	/* set pointer to the shared instrumentation */
+	ptr = (char *) pscan;
+	ptr += MAXALIGN(pscan->phs_len);
+
+	if (node->ss.ps.instrument)
+		node->trss_sinstrument = (SharedTidRangeScanInstrumentation *) ptr;
+}
+
+/* ----------------------------------------------------------------
+ *		ExecTidRangeScanRetrieveInstrumentation
+ *
+ *		Transfer scan statistics from DSM to private memory.
+ * ----------------------------------------------------------------
+ */
+void
+ExecTidRangeScanRetrieveInstrumentation(TidRangeScanState *node)
+{
+	SharedTidRangeScanInstrumentation *sinstrument = node->trss_sinstrument;
+	Size		size;
+
+	if (sinstrument == NULL)
+		return;
+
+	size = offsetof(SharedTidRangeScanInstrumentation, sinstrument)
+		+ sinstrument->num_workers * sizeof(TidRangeScanInstrumentation);
+
+	node->trss_sinstrument = palloc(size);
+	memcpy(node->trss_sinstrument, sinstrument, size);
 }
diff --git a/src/include/executor/instrument_node.h b/src/include/executor/instrument_node.h
index dba3ad7f599..9102404d155 100644
--- a/src/include/executor/instrument_node.h
+++ b/src/include/executor/instrument_node.h
@@ -106,6 +106,17 @@ typedef struct SharedSeqScanInstrumentation
 	SeqScanInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER];
 } SharedSeqScanInstrumentation;
 
+typedef struct TidRangeScanInstrumentation
+{
+	TableScanInstrumentation		stats;
+} TidRangeScanInstrumentation;
+
+typedef struct SharedTidRangeScanInstrumentation
+{
+	int			num_workers;
+	TidRangeScanInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER];
+} SharedTidRangeScanInstrumentation;
+
 typedef struct IndexScanInstrumentation
 {
 	/* Index search count (incremented with pgstat_count_index_scan call) */
diff --git a/src/include/executor/nodeTidrangescan.h b/src/include/executor/nodeTidrangescan.h
index 8752d1ea8c4..4f5dd74e0fc 100644
--- a/src/include/executor/nodeTidrangescan.h
+++ b/src/include/executor/nodeTidrangescan.h
@@ -27,5 +27,6 @@ extern void ExecTidRangeScanEstimate(TidRangeScanState *node, ParallelContext *p
 extern void ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt);
 extern void ExecTidRangeScanReInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt);
 extern void ExecTidRangeScanInitializeWorker(TidRangeScanState *node, ParallelWorkerContext *pwcxt);
+extern void ExecTidRangeScanRetrieveInstrumentation(TidRangeScanState *node);
 
 #endif							/* NODETIDRANGESCAN_H */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index e1c8a3926ac..b19bc5c6005 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1918,6 +1918,7 @@ typedef struct TidRangeScanState
 	ItemPointerData trss_maxtid;
 	bool		trss_inScan;
 	Size		trss_pscanlen;
+	SharedTidRangeScanInstrumentation *trss_sinstrument;
 } TidRangeScanState;
 
 /* ----------------
-- 
2.53.0

