From ac6329aaa2e2045dab1804ad09506f8b39906167 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Tue, 7 Apr 2026 16:45:11 +0200
Subject: [PATCH v14 5/5] Add EXPLAIN (IO) instrumentation for TidRangeScan

Adds support for EXPLAIN (IO) instrumentation for TidRange scans. This
requires adding shared instrumentation for parallel scans, using the
separate DSM approach introduced by dd78e69cfc33.

Reviewed-by: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Lukas Fittl <lukas@fittl.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/flat/a177a6dd-240b-455a-8f25-aca0b1c08c6e%40vondra.me
---
 src/backend/commands/explain.c          |  25 +++++
 src/backend/executor/execParallel.c     |  12 +++
 src/backend/executor/nodeTidrangescan.c | 133 ++++++++++++++++++++++--
 src/include/executor/instrument_node.h  |  18 ++++
 src/include/executor/nodeTidrangescan.h |   9 ++
 src/include/nodes/execnodes.h           |   1 +
 src/tools/pgindent/typedefs.list        |   2 +
 7 files changed, 192 insertions(+), 8 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index c1928713b52..f9d79879fda 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -2149,6 +2149,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 				if (plan->qual)
 					show_instrumentation_count("Rows Removed by Filter", 1,
 											   planstate, es);
+				show_scan_io_usage((ScanState *) planstate, es);
 			}
 			break;
 		case T_ForeignScan:
@@ -4127,6 +4128,30 @@ show_scan_io_usage(ScanState *planstate, ExplainState *es)
 					}
 				}
 
+				break;
+			}
+		case T_TidRangeScan:
+			{
+				SharedTidRangeScanInstrumentation *sinstrument
+				= ((TidRangeScanState *) planstate)->trss_sinstrument;
+
+				if (sinstrument)
+				{
+					for (int i = 0; i < sinstrument->num_workers; ++i)
+					{
+						TidRangeScanInstrumentation *winstrument = &sinstrument->sinstrument[i];
+
+						AccumulateIOStats(&stats, &winstrument->stats.io);
+
+						if (!es->workers_state)
+							continue;
+
+						ExplainOpenWorker(i, es);
+						print_io_usage(es, &winstrument->stats.io);
+						ExplainCloseWorker(i, es);
+					}
+				}
+
 				break;
 			}
 		default:
diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c
index 9690f0938ae..81b87d82fab 100644
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -291,6 +291,9 @@ ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateContext *e)
 			if (planstate->plan->parallel_aware)
 				ExecTidRangeScanEstimate((TidRangeScanState *) planstate,
 										 e->pcxt);
+			/* even when not parallel-aware, for EXPLAIN ANALYZE */
+			ExecTidRangeScanInstrumentEstimate((TidRangeScanState *) planstate,
+											   e->pcxt);
 			break;
 		case T_AppendState:
 			if (planstate->plan->parallel_aware)
@@ -536,6 +539,9 @@ ExecParallelInitializeDSM(PlanState *planstate,
 			if (planstate->plan->parallel_aware)
 				ExecTidRangeScanInitializeDSM((TidRangeScanState *) planstate,
 											  d->pcxt);
+			/* even when not parallel-aware, for EXPLAIN ANALYZE */
+			ExecTidRangeScanInstrumentInitDSM((TidRangeScanState *) planstate,
+											  d->pcxt);
 			break;
 		case T_AppendState:
 			if (planstate->plan->parallel_aware)
@@ -1157,6 +1163,9 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate,
 		case T_SeqScanState:
 			ExecSeqScanRetrieveInstrumentation((SeqScanState *) planstate);
 			break;
+		case T_TidRangeScanState:
+			ExecTidRangeScanRetrieveInstrumentation((TidRangeScanState *) planstate);
+			break;
 		default:
 			break;
 	}
@@ -1430,6 +1439,9 @@ ExecParallelInitializeWorker(PlanState *planstate, ParallelWorkerContext *pwcxt)
 			if (planstate->plan->parallel_aware)
 				ExecTidRangeScanInitializeWorker((TidRangeScanState *) planstate,
 												 pwcxt);
+			/* even when not parallel-aware, for EXPLAIN ANALYZE */
+			ExecTidRangeScanInstrumentInitWorker((TidRangeScanState *) planstate,
+												 pwcxt);
 			break;
 		case T_AppendState:
 			if (planstate->plan->parallel_aware)
diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c
index 4a8fe91b2b3..b387ed6c308 100644
--- a/src/backend/executor/nodeTidrangescan.c
+++ b/src/backend/executor/nodeTidrangescan.c
@@ -18,7 +18,9 @@
 #include "access/sysattr.h"
 #include "access/tableam.h"
 #include "catalog/pg_operator.h"
+#include "executor/execParallel.h"
 #include "executor/executor.h"
+#include "executor/instrument.h"
 #include "executor/nodeTidrangescan.h"
 #include "nodes/nodeFuncs.h"
 #include "utils/rel.h"
@@ -242,12 +244,19 @@ TidRangeNext(TidRangeScanState *node)
 
 		if (scandesc == NULL)
 		{
+			uint32		flags = SO_NONE;
+
+			if (ScanRelIsReadOnly(&node->ss))
+				flags |= SO_HINT_REL_READ_ONLY;
+
+			if (estate->es_instrument & INSTRUMENT_IO)
+				flags |= SO_SCAN_INSTRUMENT;
+
 			scandesc = table_beginscan_tidrange(node->ss.ss_currentRelation,
 												estate->es_snapshot,
 												&node->trss_mintid,
 												&node->trss_maxtid,
-												ScanRelIsReadOnly(&node->ss) ?
-												SO_HINT_REL_READ_ONLY : SO_NONE);
+												flags);
 			node->ss.ss_currentScanDesc = scandesc;
 		}
 		else
@@ -342,6 +351,20 @@ ExecEndTidRangeScan(TidRangeScanState *node)
 {
 	TableScanDesc scan = node->ss.ss_currentScanDesc;
 
+	/* Collect IO stats for this process into shared instrumentation */
+	if (node->trss_sinstrument != NULL && IsParallelWorker())
+	{
+		TidRangeScanInstrumentation *si;
+
+		Assert(ParallelWorkerNumber < node->trss_sinstrument->num_workers);
+		si = &node->trss_sinstrument->sinstrument[ParallelWorkerNumber];
+
+		if (scan && scan->rs_instrument)
+		{
+			AccumulateIOStats(&si->stats.io, &scan->rs_instrument->io);
+		}
+	}
+
 	if (scan != NULL)
 		table_endscan(scan);
 }
@@ -454,6 +477,13 @@ ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt)
 {
 	EState	   *estate = node->ss.ps.state;
 	ParallelTableScanDesc pscan;
+	uint32		flags = SO_NONE;
+
+	if (ScanRelIsReadOnly(&node->ss))
+		flags |= SO_HINT_REL_READ_ONLY;
+
+	if (estate->es_instrument & INSTRUMENT_IO)
+		flags |= SO_SCAN_INSTRUMENT;
 
 	pscan = shm_toc_allocate(pcxt->toc, node->trss_pscanlen);
 	table_parallelscan_initialize(node->ss.ss_currentRelation,
@@ -462,9 +492,7 @@ ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt)
 	shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan);
 	node->ss.ss_currentScanDesc =
 		table_beginscan_parallel_tidrange(node->ss.ss_currentRelation,
-										  pscan,
-										  ScanRelIsReadOnly(&node->ss) ?
-										  SO_HINT_REL_READ_ONLY : SO_NONE);
+										  pscan, flags);
 }
 
 /* ----------------------------------------------------------------
@@ -494,11 +522,100 @@ ExecTidRangeScanInitializeWorker(TidRangeScanState *node,
 								 ParallelWorkerContext *pwcxt)
 {
 	ParallelTableScanDesc pscan;
+	uint32		flags = SO_NONE;
+
+	if (ScanRelIsReadOnly(&node->ss))
+		flags |= SO_HINT_REL_READ_ONLY;
+
+	if (node->ss.ps.state->es_instrument & INSTRUMENT_IO)
+		flags |= SO_SCAN_INSTRUMENT;
 
 	pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
 	node->ss.ss_currentScanDesc =
 		table_beginscan_parallel_tidrange(node->ss.ss_currentRelation,
-										  pscan,
-										  ScanRelIsReadOnly(&node->ss) ?
-										  SO_HINT_REL_READ_ONLY : SO_NONE);
+										  pscan, flags);
+}
+
+/*
+ * Compute the amount of space we'll need for the shared instrumentation and
+ * inform pcxt->estimator.
+ */
+void
+ExecTidRangeScanInstrumentEstimate(TidRangeScanState *node,
+								   ParallelContext *pcxt)
+{
+	EState	   *estate = node->ss.ps.state;
+	Size		size;
+
+	if ((estate->es_instrument & INSTRUMENT_IO) == 0 || pcxt->nworkers == 0)
+		return;
+
+	size = add_size(offsetof(SharedTidRangeScanInstrumentation, sinstrument),
+					mul_size(pcxt->nworkers, sizeof(TidRangeScanInstrumentation)));
+
+	shm_toc_estimate_chunk(&pcxt->estimator, size);
+	shm_toc_estimate_keys(&pcxt->estimator, 1);
+}
+
+/*
+ * Set up parallel scan instrumentation.
+ */
+void
+ExecTidRangeScanInstrumentInitDSM(TidRangeScanState *node,
+								  ParallelContext *pcxt)
+{
+	EState	   *estate = node->ss.ps.state;
+	SharedTidRangeScanInstrumentation *sinstrument;
+	Size		size;
+
+	if ((estate->es_instrument & INSTRUMENT_IO) == 0 || pcxt->nworkers == 0)
+		return;
+
+	size = add_size(offsetof(SharedTidRangeScanInstrumentation, sinstrument),
+					mul_size(pcxt->nworkers, sizeof(TidRangeScanInstrumentation)));
+	sinstrument = shm_toc_allocate(pcxt->toc, size);
+	memset(sinstrument, 0, size);
+	sinstrument->num_workers = pcxt->nworkers;
+	shm_toc_insert(pcxt->toc,
+				   node->ss.ps.plan->plan_node_id +
+				   PARALLEL_KEY_SCAN_INSTRUMENT_OFFSET,
+				   sinstrument);
+	node->trss_sinstrument = sinstrument;
+}
+
+/*
+ * Look up and save the location of the shared instrumentation.
+ */
+void
+ExecTidRangeScanInstrumentInitWorker(TidRangeScanState *node,
+									 ParallelWorkerContext *pwcxt)
+{
+	EState	   *estate = node->ss.ps.state;
+
+	if ((estate->es_instrument & INSTRUMENT_IO) == 0)
+		return;
+
+	node->trss_sinstrument = shm_toc_lookup(pwcxt->toc,
+											node->ss.ps.plan->plan_node_id +
+											PARALLEL_KEY_SCAN_INSTRUMENT_OFFSET,
+											false);
+}
+
+/*
+ * Transfer scan instrumentation from DSM to private memory.
+ */
+void
+ExecTidRangeScanRetrieveInstrumentation(TidRangeScanState *node)
+{
+	SharedTidRangeScanInstrumentation *sinstrument = node->trss_sinstrument;
+	Size		size;
+
+	if (sinstrument == NULL)
+		return;
+
+	size = offsetof(SharedTidRangeScanInstrumentation, sinstrument)
+		+ sinstrument->num_workers * sizeof(TidRangeScanInstrumentation);
+
+	node->trss_sinstrument = palloc(size);
+	memcpy(node->trss_sinstrument, sinstrument, size);
 }
diff --git a/src/include/executor/instrument_node.h b/src/include/executor/instrument_node.h
index 003dc262b5d..4076990408e 100644
--- a/src/include/executor/instrument_node.h
+++ b/src/include/executor/instrument_node.h
@@ -285,4 +285,22 @@ typedef struct SharedSeqScanInstrumentation
 	SeqScanInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER];
 } SharedSeqScanInstrumentation;
 
+
+/*
+ *	Instrumentation information for TID range scans
+ */
+typedef struct TidRangeScanInstrumentation
+{
+	TableScanInstrumentation stats;
+} TidRangeScanInstrumentation;
+
+/*
+ * Shared memory container for per-worker information
+ */
+typedef struct SharedTidRangeScanInstrumentation
+{
+	int			num_workers;
+	TidRangeScanInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER];
+} SharedTidRangeScanInstrumentation;
+
 #endif							/* INSTRUMENT_NODE_H */
diff --git a/src/include/executor/nodeTidrangescan.h b/src/include/executor/nodeTidrangescan.h
index 8752d1ea8c4..9e7d0a357bb 100644
--- a/src/include/executor/nodeTidrangescan.h
+++ b/src/include/executor/nodeTidrangescan.h
@@ -28,4 +28,13 @@ extern void ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelConte
 extern void ExecTidRangeScanReInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt);
 extern void ExecTidRangeScanInitializeWorker(TidRangeScanState *node, ParallelWorkerContext *pwcxt);
 
+/* instrument support */
+extern void ExecTidRangeScanInstrumentEstimate(TidRangeScanState *node,
+											   ParallelContext *pcxt);
+extern void ExecTidRangeScanInstrumentInitDSM(TidRangeScanState *node,
+											  ParallelContext *pcxt);
+extern void ExecTidRangeScanInstrumentInitWorker(TidRangeScanState *node,
+												 ParallelWorkerContext *pwcxt);
+extern void ExecTidRangeScanRetrieveInstrumentation(TidRangeScanState *node);
+
 #endif							/* NODETIDRANGESCAN_H */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 56febb3204c..13359180d25 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1922,6 +1922,7 @@ typedef struct TidRangeScanState
 	ItemPointerData trss_maxtid;
 	bool		trss_inScan;
 	Size		trss_pscanlen;
+	struct SharedTidRangeScanInstrumentation *trss_sinstrument;
 } TidRangeScanState;
 
 /* ----------------
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index c0436a13ac3..0cd41106975 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2867,6 +2867,7 @@ SharedRecordTableKey
 SharedRecordTypmodRegistry
 SharedSeqScanInstrumentation
 SharedSortInfo
+SharedTidRangeScanInstrumentation
 SharedTuplestore
 SharedTuplestoreAccessor
 SharedTuplestoreChunk
@@ -3171,6 +3172,7 @@ TidOpExpr
 TidPath
 TidRangePath
 TidRangeScan
+TidRangeScanInstrumentation
 TidRangeScanState
 TidScan
 TidScanState
-- 
2.53.0

