From e611303cc656e5b12dacf9b9e7e1c8421916dc4a Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@vondra.me>
Date: Tue, 31 Mar 2026 15:42:32 +0200
Subject: [PATCH v7 2/6] explain: show prefetch stats in EXPLAIN (ANALYZE)

This adds details about AIO / prefetch for executor nodes using a
ReadStream. As of this commit this applies only to BitmapHeapScan,
because that's the only scan node using a ReadStream and collecting
instrumentation from workers. Support for SeqScan and TidRangeScan,
the other scan nodes using ReadStream, will be added in subsequent
commits.

The stats are collected only when required by EXPLAIN ANALYZE, with the
IO option (enabled by default). This mimics what we do for BUFFERS,
which seems like the closest EXPLAIN option. The amount of collected
statistics is very limited, but it can be disabled if needed.

The IOStats struct is stored in the scan descriptor as a field, next to
other fields used by table AMs. A pointer to the field is passed to the
ReadStream, and updated directly.

It's the responsibility of the table AM to allocate the struct (e.g. in
ambeginscan) whenever the flag SO_SCAN_INSTRUMENT flags is specified, so
that the executor and ReadStream has access to that.

The collected stats are designed for ReadStream, but is meant to be
reasonably generic in case a TAM manages I/Os in different ways.
---
 doc/src/sgml/ref/explain.sgml                 |  12 ++
 src/backend/access/heap/heapam.c              |   9 +
 src/backend/commands/explain.c                | 163 +++++++++++++++++-
 src/backend/commands/explain_state.c          |  15 ++
 src/backend/executor/nodeBitmapHeapscan.c     |  20 ++-
 src/backend/storage/aio/read_stream.c         |  90 +++++++++-
 src/include/access/relscan.h                  |   7 +
 src/include/access/tableam.h                  |   3 +
 src/include/commands/explain_state.h          |   1 +
 src/include/executor/instrument_node.h        |  50 ++++++
 src/include/storage/read_stream.h             |   2 +
 .../modules/index/expected/killtuples.out     |  44 ++---
 src/test/modules/index/specs/killtuples.spec  |   2 +-
 src/test/regress/expected/explain.out         |  16 +-
 .../regress/expected/incremental_sort.out     |   4 +-
 src/test/regress/expected/memoize.out         |   2 +-
 src/test/regress/expected/merge.out           |   2 +-
 src/test/regress/expected/partition_prune.out |   2 +-
 src/test/regress/expected/select_parallel.out |   6 +-
 src/test/regress/expected/subselect.out       |   2 +-
 src/test/regress/sql/explain.sql              |  16 +-
 src/test/regress/sql/incremental_sort.sql     |   4 +-
 src/test/regress/sql/memoize.sql              |   2 +-
 src/test/regress/sql/merge.sql                |   2 +-
 src/test/regress/sql/partition_prune.sql      |   2 +-
 src/test/regress/sql/select_parallel.sql      |   6 +-
 src/test/regress/sql/subselect.sql            |   2 +-
 src/tools/pgindent/typedefs.list              |   2 +
 28 files changed, 426 insertions(+), 62 deletions(-)

diff --git a/doc/src/sgml/ref/explain.sgml b/doc/src/sgml/ref/explain.sgml
index 5b8b521802e..4951b624893 100644
--- a/doc/src/sgml/ref/explain.sgml
+++ b/doc/src/sgml/ref/explain.sgml
@@ -46,6 +46,7 @@ EXPLAIN [ ( <replaceable class="parameter">option</replaceable> [, ...] ) ] <rep
     TIMING [ <replaceable class="parameter">boolean</replaceable> ]
     SUMMARY [ <replaceable class="parameter">boolean</replaceable> ]
     MEMORY [ <replaceable class="parameter">boolean</replaceable> ]
+    IO [ <replaceable class="parameter">boolean</replaceable> ]
     FORMAT { TEXT | XML | JSON | YAML }
 </synopsis>
  </refsynopsisdiv>
@@ -298,6 +299,17 @@ ROLLBACK;
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><literal>IO</literal></term>
+    <listitem>
+     <para>
+      Include information on I/O performed by each node.
+      This parameter may only be used when <literal>ANALYZE</literal> is also
+      enabled.  It defaults to <literal>TRUE</literal>.
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term><literal>FORMAT</literal></term>
     <listitem>
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 6bff0032db2..015943f0bf7 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -1199,6 +1199,7 @@ heap_beginscan(Relation relation, Snapshot snapshot,
 	scan->rs_base.rs_nkeys = nkeys;
 	scan->rs_base.rs_flags = flags;
 	scan->rs_base.rs_parallel = parallel_scan;
+	scan->rs_base.rs_instrument = NULL;
 	scan->rs_strategy = NULL;	/* set in initscan */
 	scan->rs_cbuf = InvalidBuffer;
 
@@ -1311,6 +1312,14 @@ heap_beginscan(Relation relation, Snapshot snapshot,
 														  sizeof(TBMIterateResult));
 	}
 
+	/* enable read stream instrumentation */
+	if (flags & SO_SCAN_INSTRUMENT)
+	{
+		scan->rs_base.rs_instrument = palloc0_object(TableScanInstrumentation);
+		read_stream_enable_stats(scan->rs_read_stream,
+								 &scan->rs_base.rs_instrument->io);
+	}
+
 	scan->rs_vmbuffer = InvalidBuffer;
 
 	return (TableScanDesc) scan;
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index e4b70166b0e..5e106baa088 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -13,6 +13,7 @@
  */
 #include "postgres.h"
 
+#include "access/relscan.h"
 #include "access/xact.h"
 #include "catalog/pg_type.h"
 #include "commands/createas.h"
@@ -139,6 +140,11 @@ static void show_hashagg_info(AggState *aggstate, ExplainState *es);
 static void show_indexsearches_info(PlanState *planstate, ExplainState *es);
 static void show_tidbitmap_info(BitmapHeapScanState *planstate,
 								ExplainState *es);
+static void show_scan_io_usage(ScanState *planstate,
+							   ExplainState *es);
+static void show_worker_io_usage(PlanState *planstate,
+								 ExplainState *es,
+								 int worker);
 static void show_instrumentation_count(const char *qlabel, int which,
 									   PlanState *planstate, ExplainState *es);
 static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es);
@@ -2009,6 +2015,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 				show_instrumentation_count("Rows Removed by Filter", 1,
 										   planstate, es);
 			show_tidbitmap_info((BitmapHeapScanState *) planstate, es);
+			show_scan_io_usage((ScanState *) planstate, es);
 			break;
 		case T_SampleScan:
 			show_tablesample(((SampleScan *) plan)->tablesample,
@@ -2027,6 +2034,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 										   planstate, es);
 			if (IsA(plan, CteScan))
 				show_ctescan_info(castNode(CteScanState, planstate), es);
+			show_scan_io_usage((ScanState *) planstate, es);
 			break;
 		case T_Gather:
 			{
@@ -2297,8 +2305,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
 	if (es->wal && planstate->instrument)
 		show_wal_usage(es, &planstate->instrument->walusage);
 
-	/* Prepare per-worker buffer/WAL usage */
-	if (es->workers_state && (es->buffers || es->wal) && es->verbose)
+	/* Prepare per-worker buffer/WAL/IO usage */
+	if (es->workers_state && (es->buffers || es->wal || es->io) && es->verbose)
 	{
 		WorkerInstrumentation *w = planstate->worker_instrument;
 
@@ -2315,6 +2323,9 @@ ExplainNode(PlanState *planstate, List *ancestors,
 				show_buffer_usage(es, &instrument->bufusage);
 			if (es->wal)
 				show_wal_usage(es, &instrument->walusage);
+			if (es->io)
+				show_worker_io_usage(planstate, es, n);
+
 			ExplainCloseWorker(n, es);
 		}
 	}
@@ -3985,6 +3996,154 @@ show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es)
 	}
 }
 
+static void
+print_io_usage(ExplainState *es, IOStats *stats)
+{
+	/* don't print stats if there's nothing to report */
+	if (stats->prefetch_count > 0)
+	{
+		if (es->format == EXPLAIN_FORMAT_TEXT)
+		{
+			/* prefetch distance info */
+			ExplainIndentText(es);
+			appendStringInfo(es->str, "Prefetch: avg=%.2f max=%d capacity=%d\n",
+							 (stats->distance_sum * 1.0 / stats->prefetch_count),
+							 stats->distance_max,
+							 stats->distance_capacity);
+
+			/* prefetch I/O info (only if there were actual I/Os) */
+			if (stats->io_count > 0)
+			{
+				ExplainIndentText(es);
+				appendStringInfo(es->str, "I/O: count=%" PRIu64 " waits=%" PRIu64
+								 " size=%.2f inprogress=%.2f\n",
+								 stats->io_count, stats->wait_count,
+								 (stats->io_nblocks * 1.0 / stats->io_count),
+								 (stats->io_in_progress * 1.0 / stats->io_count));
+			}
+		}
+		else
+		{
+			ExplainPropertyFloat("Average Prefetch Distance", NULL,
+								 (stats->distance_sum * 1.0 / stats->prefetch_count), 3, es);
+			ExplainPropertyInteger("Max Prefetch Distance", NULL,
+								   stats->distance_max, es);
+			ExplainPropertyInteger("Prefetch Capacity", NULL,
+								   stats->distance_capacity, es);
+
+			ExplainPropertyUInteger("I/O Count", NULL,
+									stats->io_count, es);
+			ExplainPropertyUInteger("I/O Waits", NULL,
+									stats->wait_count, es);
+			ExplainPropertyFloat("Average I/O Size", NULL,
+								 (stats->io_nblocks * 1.0 / Max(1, stats->io_count)), 3, es);
+			ExplainPropertyFloat("Average I/Os In Progress", NULL,
+								 (stats->io_in_progress * 1.0 / Max(1, stats->io_count)), 3, es);
+		}
+	}
+}
+
+/*
+ * show_scan_io_usage
+ *		show info about prefetching for a seq/bitmap scan
+ *
+ * Shows summary of stats for leader and workers (if any).
+ */
+static void
+show_scan_io_usage(ScanState *planstate, ExplainState *es)
+{
+	Plan	   *plan = planstate->ps.plan;
+	IOStats		stats;
+
+	if (!es->io)
+		return;
+
+	/* scan not started or no prefetch stats */
+	if (!(planstate &&
+		  planstate->ss_currentScanDesc &&
+		  planstate->ss_currentScanDesc->rs_instrument))
+		return;
+
+	/*
+	 * Initialize counters with stats from the local process first, then
+	 * accumulate data from parallel workers.
+	 */
+	switch (nodeTag(plan))
+	{
+		case T_BitmapHeapScan:
+			{
+				SharedBitmapHeapInstrumentation *sinstrument
+				= ((BitmapHeapScanState *) planstate)->sinstrument;
+
+				/* collect prefetch statistics from the read stream */
+				stats = planstate->ss_currentScanDesc->rs_instrument->io;
+
+				/*
+				 * get the sum of the counters set within each and every
+				 * process
+				 */
+				if (sinstrument)
+				{
+					for (int i = 0; i < sinstrument->num_workers; ++i)
+					{
+						BitmapHeapScanInstrumentation *winstrument = &sinstrument->sinstrument[i];
+
+						AccumulateIOStats(&stats, &winstrument->stats.io);
+					}
+				}
+
+				break;
+			}
+		default:
+			/* ignore other plans */
+			return;
+	}
+
+	print_io_usage(es, &stats);
+}
+
+/*
+ * show_io_usage
+ *		show info about I/O prefetching for a single worker
+ *
+ * Shows prefetching stats for a parallel scan worker.
+ */
+static void
+show_worker_io_usage(PlanState *planstate, ExplainState *es, int worker)
+{
+	Plan	   *plan = planstate->plan;
+	IOStats    *stats = NULL;
+
+	if (!es->io)
+		return;
+
+	/* get instrumentation for the given worker */
+	switch (nodeTag(plan))
+	{
+		case T_BitmapHeapScan:
+			{
+				BitmapHeapScanState *state = ((BitmapHeapScanState *) planstate);
+				SharedBitmapHeapInstrumentation *sinstrument = state->sinstrument;
+				BitmapHeapScanInstrumentation *instrument;
+
+				/* FIXME bitmap heap scans don't initialize instrumentation for
+				 * workers in non-parallel-aware part of the plan */
+				if (!sinstrument)
+					return;
+
+				instrument = &sinstrument->sinstrument[worker];
+				stats = &instrument->stats.io;
+
+				break;
+			}
+		default:
+			/* ignore other plans */
+			return;
+	}
+
+	print_io_usage(es, stats);
+}
+
 /*
  * If it's EXPLAIN ANALYZE, show instrumentation information for a plan node
  *
diff --git a/src/backend/commands/explain_state.c b/src/backend/commands/explain_state.c
index 77f59b8e500..3d5f92bfa91 100644
--- a/src/backend/commands/explain_state.c
+++ b/src/backend/commands/explain_state.c
@@ -79,6 +79,7 @@ ParseExplainOptionList(ExplainState *es, List *options, ParseState *pstate)
 	ListCell   *lc;
 	bool		timing_set = false;
 	bool		buffers_set = false;
+	bool		io_set = false;
 	bool		summary_set = false;
 
 	/* Parse options list. */
@@ -159,6 +160,11 @@ ParseExplainOptionList(ExplainState *es, List *options, ParseState *pstate)
 								"EXPLAIN", opt->defname, p),
 						 parser_errposition(pstate, opt->location)));
 		}
+		else if (strcmp(opt->defname, "io") == 0)
+		{
+			io_set = true;
+			es->io = defGetBoolean(opt);
+		}
 		else if (!ApplyExtensionExplainOption(es, opt, pstate))
 			ereport(ERROR,
 					(errcode(ERRCODE_SYNTAX_ERROR),
@@ -179,12 +185,21 @@ ParseExplainOptionList(ExplainState *es, List *options, ParseState *pstate)
 	/* if the buffers was not set explicitly, set default value */
 	es->buffers = (buffers_set) ? es->buffers : es->analyze;
 
+	/* if the IO was not set explicitly, set default value */
+	es->io = (io_set) ? es->io : es->analyze;
+
 	/* check that timing is used with EXPLAIN ANALYZE */
 	if (es->timing && !es->analyze)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("EXPLAIN option %s requires ANALYZE", "TIMING")));
 
+	/* check that IO is used with EXPLAIN ANALYZE */
+	if (es->io && !es->analyze)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("EXPLAIN option %s requires ANALYZE", "IO")));
+
 	/* check that serialize is used with EXPLAIN ANALYZE */
 	if (es->serialize != EXPLAIN_SERIALIZE_NONE && !es->analyze)
 		ereport(ERROR,
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index 73831aed451..dcd24d266eb 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -104,6 +104,7 @@ BitmapTableScanSetup(BitmapHeapScanState *node)
 	TBMIterator tbmiterator = {0};
 	ParallelBitmapHeapState *pstate = node->pstate;
 	dsa_area   *dsa = node->ss.ps.state->es_query_dsa;
+	EState	   *estate = node->ss.ps.state;
 
 	if (!pstate)
 	{
@@ -144,13 +145,20 @@ BitmapTableScanSetup(BitmapHeapScanState *node)
 	 */
 	if (!node->ss.ss_currentScanDesc)
 	{
+		uint32		flags = SO_NONE;
+
+		if (ScanRelIsReadOnly(&node->ss))
+			flags |= SO_HINT_REL_READ_ONLY;
+
+		if (estate->es_instrument)
+			flags |= SO_SCAN_INSTRUMENT;
+
 		node->ss.ss_currentScanDesc =
 			table_beginscan_bm(node->ss.ss_currentRelation,
 							   node->ss.ps.state->es_snapshot,
 							   0,
 							   NULL,
-							   ScanRelIsReadOnly(&node->ss) ?
-							   SO_HINT_REL_READ_ONLY : SO_NONE);
+							   flags);
 	}
 
 	node->ss.ss_currentScanDesc->st.rs_tbmiterator = tbmiterator;
@@ -330,6 +338,14 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
 		 */
 		si->exact_pages += node->stats.exact_pages;
 		si->lossy_pages += node->stats.lossy_pages;
+
+		/* collect prefetch info for this process from the read_stream */
+		if (node->ss.ss_currentScanDesc &&
+			node->ss.ss_currentScanDesc->rs_instrument)
+		{
+			AccumulateIOStats(&si->stats.io,
+							  &node->ss.ss_currentScanDesc->rs_instrument->io);
+		}
 	}
 
 	/*
diff --git a/src/backend/storage/aio/read_stream.c b/src/backend/storage/aio/read_stream.c
index 31f9e35dee3..fc301fd79b1 100644
--- a/src/backend/storage/aio/read_stream.c
+++ b/src/backend/storage/aio/read_stream.c
@@ -72,6 +72,7 @@
 #include "postgres.h"
 
 #include "miscadmin.h"
+#include "executor/instrument_node.h"
 #include "storage/aio.h"
 #include "storage/fd.h"
 #include "storage/smgr.h"
@@ -108,6 +109,9 @@ struct ReadStream
 	bool		advice_enabled;
 	bool		temporary;
 
+	/* scan stats counters */
+	IOStats    *stats;
+
 	/*
 	 * One-block buffer to support 'ungetting' a block number, to resolve flow
 	 * control problems when I/Os are split.
@@ -173,6 +177,73 @@ block_range_read_stream_cb(ReadStream *stream,
 	return InvalidBlockNumber;
 }
 
+/*
+ * Update stream stats with current pinned buffer depth.
+ *
+ * Called once per buffer returned to the consumer in read_stream_next_buffer().
+ * Records the number of pinned buffers at that moment, so we can compute the
+ * average look-ahead depth.
+ */
+static inline void
+read_stream_count_prefetch(ReadStream *stream)
+{
+	IOStats    *stats = stream->stats;
+
+	if (stats == NULL)
+		return;
+
+	stats->prefetch_count++;
+	stats->distance_sum += stream->pinned_buffers;
+	if (stream->pinned_buffers > stats->distance_max)
+		stats->distance_max = stream->pinned_buffers;
+}
+
+/*
+ * Update stream stats about size of I/O requests.
+ *
+ * We count the number of I/O requests, size of requests (counted in blocks)
+ * and number of in-progress I/Os.
+ */
+static inline void
+read_stream_count_io(ReadStream *stream, int nblocks, int in_progress)
+{
+	IOStats    *stats = stream->stats;
+
+	if (stats == NULL)
+		return;
+
+	stats->io_count++;
+	stats->io_nblocks += nblocks;
+	stats->io_in_progress += in_progress;
+}
+
+/*
+ * Update stream stats about waits for I/O when consuming buffers.
+ *
+ * We count the number of I/O waits while pulling buffers out of a stream.
+ */
+static inline void
+read_stream_count_wait(ReadStream *stream)
+{
+	IOStats    *stats = stream->stats;
+
+	if (stats == NULL)
+		return;
+
+	stats->wait_count++;
+}
+
+/*
+ * Enable collection of stats into the provided IOStats.
+ */
+void
+read_stream_enable_stats(ReadStream *stream, IOStats *stats)
+{
+	stream->stats = stats;
+	if (stream->stats)
+		stream->stats->distance_capacity = stream->max_pinned_buffers;
+}
+
 /*
  * Ask the callback which block it would like us to read next, with a one block
  * buffer in front to allow read_stream_unget_block() to work.
@@ -394,6 +465,9 @@ read_stream_start_pending_read(ReadStream *stream)
 		Assert(stream->ios_in_progress < stream->max_ios);
 		stream->ios_in_progress++;
 		stream->seq_blocknum = stream->pending_read_blocknum + nblocks;
+
+		/* update I/O stats */
+		read_stream_count_io(stream, nblocks, stream->ios_in_progress);
 	}
 
 	/*
@@ -881,6 +955,7 @@ read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
 										flags)))
 			{
 				/* Fast return. */
+				read_stream_count_prefetch(stream);
 				return buffer;
 			}
 
@@ -896,6 +971,12 @@ read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
 			 * to avoid having to effectively do another synchronous IO for
 			 * the next block (if it were also a miss).
 			 */
+
+			/* update I/O stats */
+			read_stream_count_io(stream, 1, stream->ios_in_progress);
+
+			/* update prefetch distance */
+			read_stream_count_prefetch(stream);
 		}
 		else
 		{
@@ -952,12 +1033,17 @@ read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
 	{
 		int16		io_index = stream->oldest_io_index;
 		int32		distance;	/* wider temporary value, clamped below */
+		bool		needed_wait;
 
 		/* Sanity check that we still agree on the buffers. */
 		Assert(stream->ios[io_index].op.buffers ==
 			   &stream->buffers[oldest_buffer_index]);
 
-		WaitReadBuffers(&stream->ios[io_index].op);
+		needed_wait = WaitReadBuffers(&stream->ios[io_index].op);
+
+		/* Count it as a wait if we need to wait for IO */
+		if (needed_wait)
+			read_stream_count_wait(stream);
 
 		Assert(stream->ios_in_progress > 0);
 		stream->ios_in_progress--;
@@ -1031,6 +1117,8 @@ read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
 	}
 #endif
 
+	read_stream_count_prefetch(stream);
+
 	/* Pin transferred to caller. */
 	Assert(stream->pinned_buffers > 0);
 	stream->pinned_buffers--;
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index 960abf6c214..a9c334e633d 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -25,6 +25,8 @@
 
 struct ParallelTableScanDescData;
 
+struct TableScanInstrumentation;
+
 /*
  * Generic descriptor for table scans. This is the base-class for table scans,
  * which needs to be embedded in the scans of individual AMs.
@@ -64,6 +66,11 @@ typedef struct TableScanDescData
 
 	struct ParallelTableScanDescData *rs_parallel;	/* parallel scan
 													 * information */
+
+	/*
+	 * Instrumentation counters maintained by all table AMs.
+	 */
+	struct TableScanInstrumentation *rs_instrument;
 } TableScanDescData;
 typedef struct TableScanDescData *TableScanDesc;
 
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index 4647785fd35..9e3d7186868 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -69,6 +69,9 @@ typedef enum ScanOptions
 
 	/* set if the query doesn't modify the relation */
 	SO_HINT_REL_READ_ONLY = 1 << 10,
+
+	/* collect scan instrumentation */
+	SO_SCAN_INSTRUMENT = 1 << 11,
 }			ScanOptions;
 
 /*
diff --git a/src/include/commands/explain_state.h b/src/include/commands/explain_state.h
index 5a48bc6fbb1..b412f00f70c 100644
--- a/src/include/commands/explain_state.h
+++ b/src/include/commands/explain_state.h
@@ -55,6 +55,7 @@ typedef struct ExplainState
 	bool		summary;		/* print total planning and execution timing */
 	bool		memory;			/* print planner's memory usage information */
 	bool		settings;		/* print modified settings */
+	bool		io;				/* print info about IO (prefetch, ...) */
 	bool		generic;		/* generate a generic plan */
 	ExplainSerializeOption serialize;	/* serialize the query's output? */
 	ExplainFormat format;		/* output format */
diff --git a/src/include/executor/instrument_node.h b/src/include/executor/instrument_node.h
index 2a0ff377a73..6d1554fbc46 100644
--- a/src/include/executor/instrument_node.h
+++ b/src/include/executor/instrument_node.h
@@ -40,10 +40,59 @@ typedef struct SharedAggInfo
 } SharedAggInfo;
 
 
+/* ---------------------
+ *	Instrumentation information about read streams
+ * ---------------------
+ */
+typedef struct IOStats
+{
+	/* number of buffers returned to consumer (for averaging distance) */
+	uint64		prefetch_count;
+
+	/* sum of pinned_buffers sampled at each buffer return */
+	uint64		distance_sum;
+
+	/* maximum actual pinned_buffers observed during the scan */
+	int16		distance_max;
+
+	/* maximum possible look-ahead distance (max_pinned_buffers) */
+	int16		distance_capacity;
+
+	/* number of waits for a read (for the I/O) */
+	uint64		wait_count;
+
+	/* I/O stats */
+	uint64		io_count;		/* number of I/Os */
+	uint64		io_nblocks;		/* sum of blocks for all I/Os */
+	uint64		io_in_progress; /* sum of in-progress I/Os */
+} IOStats;
+
+typedef struct TableScanInstrumentation
+{
+	IOStats		io;
+} TableScanInstrumentation;
+
+/* merge IO statistics from 'src' into 'dst' */
+static inline void
+AccumulateIOStats(IOStats *dst, IOStats *src)
+{
+	dst->prefetch_count += src->prefetch_count;
+	dst->distance_sum += src->distance_sum;
+	if (src->distance_max > dst->distance_max)
+		dst->distance_max = src->distance_max;
+	if (src->distance_capacity > dst->distance_capacity)
+		dst->distance_capacity = src->distance_capacity;
+	dst->wait_count += src->wait_count;
+	dst->io_count += src->io_count;
+	dst->io_nblocks += src->io_nblocks;
+	dst->io_in_progress += src->io_in_progress;
+}
+
 /* ---------------------
  *	Instrumentation information for indexscans (amgettuple and amgetbitmap)
  * ---------------------
  */
+
 typedef struct IndexScanInstrumentation
 {
 	/* Index search count (incremented with pgstat_count_index_scan call) */
@@ -71,6 +120,7 @@ typedef struct BitmapHeapScanInstrumentation
 {
 	uint64		exact_pages;
 	uint64		lossy_pages;
+	TableScanInstrumentation stats;
 } BitmapHeapScanInstrumentation;
 
 /*
diff --git a/src/include/storage/read_stream.h b/src/include/storage/read_stream.h
index c9359b29b0f..6a589510669 100644
--- a/src/include/storage/read_stream.h
+++ b/src/include/storage/read_stream.h
@@ -65,6 +65,7 @@
 
 struct ReadStream;
 typedef struct ReadStream ReadStream;
+typedef struct IOStats IOStats;
 
 /* for block_range_read_stream_cb */
 typedef struct BlockRangeReadStreamPrivate
@@ -103,5 +104,6 @@ extern BlockNumber read_stream_pause(ReadStream *stream);
 extern void read_stream_resume(ReadStream *stream);
 extern void read_stream_reset(ReadStream *stream);
 extern void read_stream_end(ReadStream *stream);
+extern void read_stream_enable_stats(ReadStream *stream, IOStats *stat);
 
 #endif							/* READ_STREAM_H */
diff --git a/src/test/modules/index/expected/killtuples.out b/src/test/modules/index/expected/killtuples.out
index a3db2c40936..550453fece8 100644
--- a/src/test/modules/index/expected/killtuples.out
+++ b/src/test/modules/index/expected/killtuples.out
@@ -8,7 +8,7 @@ step flush: SELECT FROM pg_stat_force_next_flush();
 step disable_seq: SET enable_seqscan = false;
 step disable_bitmap: SET enable_bitmapscan = false;
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
-step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
+step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF, IO OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
 QUERY PLAN                                                                            
 --------------------------------------------------------------------------------------
 Index Scan using kill_prior_tuple_btree on kill_prior_tuple (actual rows=1.00 loops=1)
@@ -24,7 +24,7 @@ t
 (1 row)
 
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
-step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
+step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF, IO OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
 QUERY PLAN                                                                            
 --------------------------------------------------------------------------------------
 Index Scan using kill_prior_tuple_btree on kill_prior_tuple (actual rows=1.00 loops=1)
@@ -42,7 +42,7 @@ t
 step delete: DELETE FROM kill_prior_tuple;
 step flush: SELECT FROM pg_stat_force_next_flush();
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
-step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
+step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF, IO OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
 QUERY PLAN                                                                            
 --------------------------------------------------------------------------------------
 Index Scan using kill_prior_tuple_btree on kill_prior_tuple (actual rows=0.00 loops=1)
@@ -58,7 +58,7 @@ t
 (1 row)
 
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
-step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
+step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF, IO OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
 QUERY PLAN                                                                            
 --------------------------------------------------------------------------------------
 Index Scan using kill_prior_tuple_btree on kill_prior_tuple (actual rows=0.00 loops=1)
@@ -84,7 +84,7 @@ step flush: SELECT FROM pg_stat_force_next_flush();
 step disable_seq: SET enable_seqscan = false;
 step disable_bitmap: SET enable_bitmapscan = false;
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
-step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
+step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF, IO OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
 QUERY PLAN                                                                           
 -------------------------------------------------------------------------------------
 Index Scan using kill_prior_tuple_gist on kill_prior_tuple (actual rows=1.00 loops=1)
@@ -100,7 +100,7 @@ t
 (1 row)
 
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
-step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
+step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF, IO OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
 QUERY PLAN                                                                           
 -------------------------------------------------------------------------------------
 Index Scan using kill_prior_tuple_gist on kill_prior_tuple (actual rows=1.00 loops=1)
@@ -118,7 +118,7 @@ t
 step delete: DELETE FROM kill_prior_tuple;
 step flush: SELECT FROM pg_stat_force_next_flush();
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
-step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
+step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF, IO OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
 QUERY PLAN                                                                           
 -------------------------------------------------------------------------------------
 Index Scan using kill_prior_tuple_gist on kill_prior_tuple (actual rows=0.00 loops=1)
@@ -134,7 +134,7 @@ t
 (1 row)
 
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
-step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
+step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF, IO OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
 QUERY PLAN                                                                           
 -------------------------------------------------------------------------------------
 Index Scan using kill_prior_tuple_gist on kill_prior_tuple (actual rows=0.00 loops=1)
@@ -161,7 +161,7 @@ step flush: SELECT FROM pg_stat_force_next_flush();
 step disable_seq: SET enable_seqscan = false;
 step disable_bitmap: SET enable_bitmapscan = false;
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
-step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
+step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF, IO OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
 QUERY PLAN                                                                           
 -------------------------------------------------------------------------------------
 Index Scan using kill_prior_tuple_gist on kill_prior_tuple (actual rows=1.00 loops=1)
@@ -177,7 +177,7 @@ t
 (1 row)
 
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
-step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
+step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF, IO OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
 QUERY PLAN                                                                           
 -------------------------------------------------------------------------------------
 Index Scan using kill_prior_tuple_gist on kill_prior_tuple (actual rows=1.00 loops=1)
@@ -195,7 +195,7 @@ t
 step delete: DELETE FROM kill_prior_tuple;
 step flush: SELECT FROM pg_stat_force_next_flush();
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
-step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
+step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF, IO OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
 QUERY PLAN                                                                           
 -------------------------------------------------------------------------------------
 Index Scan using kill_prior_tuple_gist on kill_prior_tuple (actual rows=0.00 loops=1)
@@ -211,7 +211,7 @@ t
 (1 row)
 
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
-step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
+step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF, IO OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
 QUERY PLAN                                                                           
 -------------------------------------------------------------------------------------
 Index Scan using kill_prior_tuple_gist on kill_prior_tuple (actual rows=0.00 loops=1)
@@ -237,7 +237,7 @@ step flush: SELECT FROM pg_stat_force_next_flush();
 step disable_seq: SET enable_seqscan = false;
 step disable_bitmap: SET enable_bitmapscan = false;
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
-step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
+step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF, IO OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
 QUERY PLAN                                                                           
 -------------------------------------------------------------------------------------
 Index Scan using kill_prior_tuple_hash on kill_prior_tuple (actual rows=1.00 loops=1)
@@ -253,7 +253,7 @@ t
 (1 row)
 
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
-step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
+step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF, IO OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
 QUERY PLAN                                                                           
 -------------------------------------------------------------------------------------
 Index Scan using kill_prior_tuple_hash on kill_prior_tuple (actual rows=1.00 loops=1)
@@ -271,7 +271,7 @@ t
 step delete: DELETE FROM kill_prior_tuple;
 step flush: SELECT FROM pg_stat_force_next_flush();
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
-step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
+step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF, IO OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
 QUERY PLAN                                                                           
 -------------------------------------------------------------------------------------
 Index Scan using kill_prior_tuple_hash on kill_prior_tuple (actual rows=0.00 loops=1)
@@ -287,7 +287,7 @@ t
 (1 row)
 
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
-step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
+step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF, IO OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
 QUERY PLAN                                                                           
 -------------------------------------------------------------------------------------
 Index Scan using kill_prior_tuple_hash on kill_prior_tuple (actual rows=0.00 loops=1)
@@ -312,7 +312,7 @@ step flush: SELECT FROM pg_stat_force_next_flush();
 step disable_seq: SET enable_seqscan = false;
 step disable_bitmap: SET enable_bitmapscan = false;
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
-step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
+step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF, IO OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
 QUERY PLAN                                                                             
 ---------------------------------------------------------------------------------------
 Index Scan using kill_prior_tuple_hash on kill_prior_tuple (actual rows=408.00 loops=1)
@@ -328,7 +328,7 @@ t
 (1 row)
 
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
-step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
+step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF, IO OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
 QUERY PLAN                                                                             
 ---------------------------------------------------------------------------------------
 Index Scan using kill_prior_tuple_hash on kill_prior_tuple (actual rows=408.00 loops=1)
@@ -346,7 +346,7 @@ t
 step delete: DELETE FROM kill_prior_tuple;
 step flush: SELECT FROM pg_stat_force_next_flush();
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
-step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
+step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF, IO OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
 QUERY PLAN                                                                           
 -------------------------------------------------------------------------------------
 Index Scan using kill_prior_tuple_hash on kill_prior_tuple (actual rows=0.00 loops=1)
@@ -362,7 +362,7 @@ t
 (1 row)
 
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
-step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
+step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF, IO OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
 QUERY PLAN                                                                           
 -------------------------------------------------------------------------------------
 Index Scan using kill_prior_tuple_hash on kill_prior_tuple (actual rows=0.00 loops=1)
@@ -389,7 +389,7 @@ step disable_seq: SET enable_seqscan = false;
 step delete: DELETE FROM kill_prior_tuple;
 step flush: SELECT FROM pg_stat_force_next_flush();
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
-step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
+step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF, IO OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
 QUERY PLAN                                                                
 --------------------------------------------------------------------------
 Bitmap Heap Scan on kill_prior_tuple (actual rows=0.00 loops=1)           
@@ -408,7 +408,7 @@ t
 (1 row)
 
 step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple');
-step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
+step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF, IO OFF) SELECT * FROM kill_prior_tuple WHERE key = 1;
 QUERY PLAN                                                                
 --------------------------------------------------------------------------
 Bitmap Heap Scan on kill_prior_tuple (actual rows=0.00 loops=1)           
diff --git a/src/test/modules/index/specs/killtuples.spec b/src/test/modules/index/specs/killtuples.spec
index 3b98ff9f76d..a5d5c33d788 100644
--- a/src/test/modules/index/specs/killtuples.spec
+++ b/src/test/modules/index/specs/killtuples.spec
@@ -45,7 +45,7 @@ step measure { UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_
 
 step result { SELECT ((heap_blks_read + heap_blks_hit - counter.heap_accesses) > 0) AS has_new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple'; }
 
-step access { EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1; }
+step access { EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF, IO OFF) SELECT * FROM kill_prior_tuple WHERE key = 1; }
 
 step delete { DELETE FROM kill_prior_tuple; }
 
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
index dc31c7ce9f9..d7754c6f2cb 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -68,7 +68,7 @@ select explain_filter('explain select * from int8_tbl i8');
  Seq Scan on int8_tbl i8  (cost=N.N..N.N rows=N width=N)
 (1 row)
 
-select explain_filter('explain (analyze, buffers off) select * from int8_tbl i8');
+select explain_filter('explain (analyze, buffers off, io off) select * from int8_tbl i8');
                                          explain_filter                                          
 -------------------------------------------------------------------------------------------------
  Seq Scan on int8_tbl i8  (cost=N.N..N.N rows=N width=N) (actual time=N.N..N.N rows=N.N loops=N)
@@ -76,7 +76,7 @@ select explain_filter('explain (analyze, buffers off) select * from int8_tbl i8'
  Execution Time: N.N ms
 (3 rows)
 
-select explain_filter('explain (analyze, buffers off, verbose) select * from int8_tbl i8');
+select explain_filter('explain (analyze, buffers off, io off, verbose) select * from int8_tbl i8');
                                              explain_filter                                             
 --------------------------------------------------------------------------------------------------------
  Seq Scan on public.int8_tbl i8  (cost=N.N..N.N rows=N width=N) (actual time=N.N..N.N rows=N.N loops=N)
@@ -392,7 +392,7 @@ select explain_filter('explain (memory) select * from int8_tbl i8');
    Memory: used=NkB  allocated=NkB
 (2 rows)
 
-select explain_filter('explain (memory, analyze, buffers off) select * from int8_tbl i8');
+select explain_filter('explain (memory, analyze, buffers off, io off) select * from int8_tbl i8');
                                          explain_filter                                          
 -------------------------------------------------------------------------------------------------
  Seq Scan on int8_tbl i8  (cost=N.N..N.N rows=N width=N) (actual time=N.N..N.N rows=N.N loops=N)
@@ -740,7 +740,7 @@ select explain_filter('explain (verbose) create table test_ctas as select 1');
 (3 rows)
 
 -- Test SERIALIZE option
-select explain_filter('explain (analyze,buffers off,serialize) select * from int8_tbl i8');
+select explain_filter('explain (analyze,buffers off,io off,serialize) select * from int8_tbl i8');
                                          explain_filter                                          
 -------------------------------------------------------------------------------------------------
  Seq Scan on int8_tbl i8  (cost=N.N..N.N rows=N width=N) (actual time=N.N..N.N rows=N.N loops=N)
@@ -768,7 +768,7 @@ select explain_filter('explain (analyze,serialize binary,buffers,timing) select
 (4 rows)
 
 -- this tests an edge case where we have no data to return
-select explain_filter('explain (analyze,buffers off,serialize) create temp table explain_temp as select * from int8_tbl i8');
+select explain_filter('explain (analyze,buffers off,io off,serialize) create temp table explain_temp as select * from int8_tbl i8');
                                          explain_filter                                          
 -------------------------------------------------------------------------------------------------
  Seq Scan on int8_tbl i8  (cost=N.N..N.N rows=N width=N) (actual time=N.N..N.N rows=N.N loops=N)
@@ -778,7 +778,7 @@ select explain_filter('explain (analyze,buffers off,serialize) create temp table
 (4 rows)
 
 -- Test tuplestore storage usage in Window aggregate (memory case)
-select explain_filter('explain (analyze,buffers off,costs off) select sum(n) over() from generate_series(1,10) a(n)');
+select explain_filter('explain (analyze,buffers off,io off,costs off) select sum(n) over() from generate_series(1,10) a(n)');
                                   explain_filter                                  
 ----------------------------------------------------------------------------------
  WindowAgg (actual time=N.N..N.N rows=N.N loops=N)
@@ -791,7 +791,7 @@ select explain_filter('explain (analyze,buffers off,costs off) select sum(n) ove
 
 -- Test tuplestore storage usage in Window aggregate (disk case)
 set work_mem to 64;
-select explain_filter('explain (analyze,buffers off,costs off) select sum(n) over() from generate_series(1,2500) a(n)');
+select explain_filter('explain (analyze,buffers off,io off,costs off) select sum(n) over() from generate_series(1,2500) a(n)');
                                   explain_filter                                  
 ----------------------------------------------------------------------------------
  WindowAgg (actual time=N.N..N.N rows=N.N loops=N)
@@ -803,7 +803,7 @@ select explain_filter('explain (analyze,buffers off,costs off) select sum(n) ove
 (6 rows)
 
 -- Test tuplestore storage usage in Window aggregate (memory and disk case, final result is disk)
-select explain_filter('explain (analyze,buffers off,costs off) select sum(n) over(partition by m) from (SELECT n < 3 as m, n from generate_series(1,2500) a(n))');
+select explain_filter('explain (analyze,buffers off,io off,costs off) select sum(n) over(partition by m) from (SELECT n < 3 as m, n from generate_series(1,2500) a(n))');
                                      explain_filter                                     
 ----------------------------------------------------------------------------------------
  WindowAgg (actual time=N.N..N.N rows=N.N loops=N)
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index 1e6e020fea8..55d5a52d96b 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -39,7 +39,7 @@ declare
   line text;
 begin
   for line in
-    execute 'explain (analyze, costs off, summary off, timing off, buffers off) ' || query
+    execute 'explain (analyze, costs off, summary off, timing off, buffers off, io off) ' || query
   loop
     out_line := regexp_replace(line, '\d+kB', 'NNkB', 'g');
     return next;
@@ -55,7 +55,7 @@ declare
   element jsonb;
   matching_nodes jsonb := '[]'::jsonb;
 begin
-  execute 'explain (analyze, costs off, summary off, timing off, buffers off, format ''json'') ' || query into strict elements;
+  execute 'explain (analyze, costs off, summary off, timing off, buffers off, io off, format ''json'') ' || query into strict elements;
   while jsonb_array_length(elements) > 0 loop
     element := elements->0;
     elements := elements - 0;
diff --git a/src/test/regress/expected/memoize.out b/src/test/regress/expected/memoize.out
index 218972dfab8..950c0fbae69 100644
--- a/src/test/regress/expected/memoize.out
+++ b/src/test/regress/expected/memoize.out
@@ -10,7 +10,7 @@ declare
     ln text;
 begin
     for ln in
-        execute format('explain (analyze, costs off, summary off, timing off, buffers off) %s',
+        execute format('explain (analyze, costs off, summary off, timing off, buffers off, io off) %s',
             query)
     loop
         if hide_hitmiss = true then
diff --git a/src/test/regress/expected/merge.out b/src/test/regress/expected/merge.out
index 9cb1d87066a..f1331c307d3 100644
--- a/src/test/regress/expected/merge.out
+++ b/src/test/regress/expected/merge.out
@@ -1623,7 +1623,7 @@ $$
 DECLARE ln text;
 BEGIN
     FOR ln IN
-        EXECUTE 'explain (analyze, timing off, summary off, costs off, buffers off) ' ||
+        EXECUTE 'explain (analyze, timing off, summary off, costs off, buffers off, io off) ' ||
 		  query
     LOOP
         ln := regexp_replace(ln, '(Memory( Usage)?|Buckets|Batches): \S*',  '\1: xxx', 'g');
diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out
index deacdd75807..1012be91689 100644
--- a/src/test/regress/expected/partition_prune.out
+++ b/src/test/regress/expected/partition_prune.out
@@ -11,7 +11,7 @@ declare
     ln text;
 begin
     for ln in
-        execute format('explain (analyze, costs off, summary off, timing off, buffers off) %s',
+        execute format('explain (analyze, costs off, summary off, timing off, buffers off, io off) %s',
             query)
     loop
         ln := regexp_replace(ln, 'Maximum Storage: \d+', 'Maximum Storage: N');
diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out
index 933921d1860..bfaf10a2906 100644
--- a/src/test/regress/expected/select_parallel.out
+++ b/src/test/regress/expected/select_parallel.out
@@ -580,7 +580,7 @@ select count(*) from bmscantest where a>1;
 -- test accumulation of stats for parallel nodes
 reset enable_seqscan;
 alter table tenk2 set (parallel_workers = 0);
-explain (analyze, timing off, summary off, costs off, buffers off)
+explain (analyze, timing off, summary off, costs off, buffers off, io off)
    select count(*) from tenk1, tenk2 where tenk1.hundred > 1
         and tenk2.thousand=0;
                                  QUERY PLAN                                  
@@ -606,7 +606,7 @@ $$
 declare ln text;
 begin
     for ln in
-        explain (analyze, timing off, summary off, costs off, buffers off)
+        explain (analyze, timing off, summary off, costs off, buffers off, io off)
           select * from
           (select ten from tenk1 where ten < 100 order by ten) ss
           right join (values (1),(2),(3)) v(x) on true
@@ -1170,7 +1170,7 @@ explain (costs off)
 -- to increase the parallel query test coverage
 SAVEPOINT settings;
 SET LOCAL debug_parallel_query = 1;
-EXPLAIN (analyze, timing off, summary off, costs off, buffers off) SELECT * FROM tenk1;
+EXPLAIN (analyze, timing off, summary off, costs off, buffers off, io off) SELECT * FROM tenk1;
                            QUERY PLAN                           
 ----------------------------------------------------------------
  Gather (actual rows=10000.00 loops=1)
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 200236a0a69..0a1de6d7259 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -2051,7 +2051,7 @@ $$
 declare ln text;
 begin
     for ln in
-        explain (analyze, summary off, timing off, costs off, buffers off)
+        explain (analyze, summary off, timing off, costs off, buffers off, io off)
         select * from (select pk,c2 from sq_limit order by c1,pk) as x limit 3
     loop
         ln := regexp_replace(ln, 'Memory: \S*',  'Memory: xxx');
diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql
index 8f10e1aff55..dbd210a07f2 100644
--- a/src/test/regress/sql/explain.sql
+++ b/src/test/regress/sql/explain.sql
@@ -63,8 +63,8 @@ set track_io_timing = off;
 
 explain (costs off) select 1 as a, 2 as b having false;
 select explain_filter('explain select * from int8_tbl i8');
-select explain_filter('explain (analyze, buffers off) select * from int8_tbl i8');
-select explain_filter('explain (analyze, buffers off, verbose) select * from int8_tbl i8');
+select explain_filter('explain (analyze, buffers off, io off) select * from int8_tbl i8');
+select explain_filter('explain (analyze, buffers off, io off, verbose) select * from int8_tbl i8');
 select explain_filter('explain (analyze, buffers, format text) select * from int8_tbl i8');
 select explain_filter('explain (buffers, format text) select * from int8_tbl i8');
 
@@ -105,7 +105,7 @@ select explain_filter('explain (analyze, generic_plan) select unique1 from tenk1
 
 -- MEMORY option
 select explain_filter('explain (memory) select * from int8_tbl i8');
-select explain_filter('explain (memory, analyze, buffers off) select * from int8_tbl i8');
+select explain_filter('explain (memory, analyze, buffers off, io off) select * from int8_tbl i8');
 select explain_filter('explain (memory, summary, format yaml) select * from int8_tbl i8');
 select explain_filter('explain (memory, analyze, format json) select * from int8_tbl i8');
 prepare int8_query as select * from int8_tbl i8;
@@ -177,17 +177,17 @@ select explain_filter('explain (verbose) declare test_cur cursor for select * fr
 select explain_filter('explain (verbose) create table test_ctas as select 1');
 
 -- Test SERIALIZE option
-select explain_filter('explain (analyze,buffers off,serialize) select * from int8_tbl i8');
+select explain_filter('explain (analyze,buffers off,io off,serialize) select * from int8_tbl i8');
 select explain_filter('explain (analyze,serialize text,buffers,timing off) select * from int8_tbl i8');
 select explain_filter('explain (analyze,serialize binary,buffers,timing) select * from int8_tbl i8');
 -- this tests an edge case where we have no data to return
-select explain_filter('explain (analyze,buffers off,serialize) create temp table explain_temp as select * from int8_tbl i8');
+select explain_filter('explain (analyze,buffers off,io off,serialize) create temp table explain_temp as select * from int8_tbl i8');
 
 -- Test tuplestore storage usage in Window aggregate (memory case)
-select explain_filter('explain (analyze,buffers off,costs off) select sum(n) over() from generate_series(1,10) a(n)');
+select explain_filter('explain (analyze,buffers off,io off,costs off) select sum(n) over() from generate_series(1,10) a(n)');
 -- Test tuplestore storage usage in Window aggregate (disk case)
 set work_mem to 64;
-select explain_filter('explain (analyze,buffers off,costs off) select sum(n) over() from generate_series(1,2500) a(n)');
+select explain_filter('explain (analyze,buffers off,io off,costs off) select sum(n) over() from generate_series(1,2500) a(n)');
 -- Test tuplestore storage usage in Window aggregate (memory and disk case, final result is disk)
-select explain_filter('explain (analyze,buffers off,costs off) select sum(n) over(partition by m) from (SELECT n < 3 as m, n from generate_series(1,2500) a(n))');
+select explain_filter('explain (analyze,buffers off,io off,costs off) select sum(n) over(partition by m) from (SELECT n < 3 as m, n from generate_series(1,2500) a(n))');
 reset work_mem;
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index bbe658a7588..1c8036faade 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -21,7 +21,7 @@ declare
   line text;
 begin
   for line in
-    execute 'explain (analyze, costs off, summary off, timing off, buffers off) ' || query
+    execute 'explain (analyze, costs off, summary off, timing off, buffers off, io off) ' || query
   loop
     out_line := regexp_replace(line, '\d+kB', 'NNkB', 'g');
     return next;
@@ -38,7 +38,7 @@ declare
   element jsonb;
   matching_nodes jsonb := '[]'::jsonb;
 begin
-  execute 'explain (analyze, costs off, summary off, timing off, buffers off, format ''json'') ' || query into strict elements;
+  execute 'explain (analyze, costs off, summary off, timing off, buffers off, io off, format ''json'') ' || query into strict elements;
   while jsonb_array_length(elements) > 0 loop
     element := elements->0;
     elements := elements - 0;
diff --git a/src/test/regress/sql/memoize.sql b/src/test/regress/sql/memoize.sql
index e39bbb65391..532df97ee90 100644
--- a/src/test/regress/sql/memoize.sql
+++ b/src/test/regress/sql/memoize.sql
@@ -11,7 +11,7 @@ declare
     ln text;
 begin
     for ln in
-        execute format('explain (analyze, costs off, summary off, timing off, buffers off) %s',
+        execute format('explain (analyze, costs off, summary off, timing off, buffers off, io off) %s',
             query)
     loop
         if hide_hitmiss = true then
diff --git a/src/test/regress/sql/merge.sql b/src/test/regress/sql/merge.sql
index 2660b19f238..c82e194078e 100644
--- a/src/test/regress/sql/merge.sql
+++ b/src/test/regress/sql/merge.sql
@@ -1074,7 +1074,7 @@ $$
 DECLARE ln text;
 BEGIN
     FOR ln IN
-        EXECUTE 'explain (analyze, timing off, summary off, costs off, buffers off) ' ||
+        EXECUTE 'explain (analyze, timing off, summary off, costs off, buffers off, io off) ' ||
 		  query
     LOOP
         ln := regexp_replace(ln, '(Memory( Usage)?|Buckets|Batches): \S*',  '\1: xxx', 'g');
diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql
index d93c0c03bab..212de0e6285 100644
--- a/src/test/regress/sql/partition_prune.sql
+++ b/src/test/regress/sql/partition_prune.sql
@@ -12,7 +12,7 @@ declare
     ln text;
 begin
     for ln in
-        execute format('explain (analyze, costs off, summary off, timing off, buffers off) %s',
+        execute format('explain (analyze, costs off, summary off, timing off, buffers off, io off) %s',
             query)
     loop
         ln := regexp_replace(ln, 'Maximum Storage: \d+', 'Maximum Storage: N');
diff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql
index 71a75bc86ea..56e1ceac492 100644
--- a/src/test/regress/sql/select_parallel.sql
+++ b/src/test/regress/sql/select_parallel.sql
@@ -230,7 +230,7 @@ select count(*) from bmscantest where a>1;
 -- test accumulation of stats for parallel nodes
 reset enable_seqscan;
 alter table tenk2 set (parallel_workers = 0);
-explain (analyze, timing off, summary off, costs off, buffers off)
+explain (analyze, timing off, summary off, costs off, buffers off, io off)
    select count(*) from tenk1, tenk2 where tenk1.hundred > 1
         and tenk2.thousand=0;
 alter table tenk2 reset (parallel_workers);
@@ -242,7 +242,7 @@ $$
 declare ln text;
 begin
     for ln in
-        explain (analyze, timing off, summary off, costs off, buffers off)
+        explain (analyze, timing off, summary off, costs off, buffers off, io off)
           select * from
           (select ten from tenk1 where ten < 100 order by ten) ss
           right join (values (1),(2),(3)) v(x) on true
@@ -450,7 +450,7 @@ explain (costs off)
 -- to increase the parallel query test coverage
 SAVEPOINT settings;
 SET LOCAL debug_parallel_query = 1;
-EXPLAIN (analyze, timing off, summary off, costs off, buffers off) SELECT * FROM tenk1;
+EXPLAIN (analyze, timing off, summary off, costs off, buffers off, io off) SELECT * FROM tenk1;
 ROLLBACK TO SAVEPOINT settings;
 
 -- provoke error in worker
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index 4cd016f4ac3..3e930d8b56d 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -1016,7 +1016,7 @@ $$
 declare ln text;
 begin
     for ln in
-        explain (analyze, summary off, timing off, costs off, buffers off)
+        explain (analyze, summary off, timing off, costs off, buffers off, io off)
         select * from (select pk,c2 from sq_limit order by c1,pk) as x limit 3
     loop
         ln := regexp_replace(ln, 'Memory: \S*',  'Memory: xxx');
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 5bc517602b1..087bb8fcd4a 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1266,6 +1266,7 @@ IOContext
 IOFuncSelector
 IOObject
 IOOp
+IOStats
 IO_STATUS_BLOCK
 IPCompareMethod
 ITEM
@@ -3104,6 +3105,7 @@ TableLikeClause
 TableSampleClause
 TableScanDesc
 TableScanDescData
+TableScanInstrumentation
 TableSpaceCacheEntry
 TableSpaceOpts
 TableToProcess
-- 
2.53.0

