From 2e7c65b60710f067aec410a7d02799a3b53ef3c8 Mon Sep 17 00:00:00 2001
From: Greg Burd <greg@burd.me>
Date: Mon, 15 Jun 2026 14:22:45 -0400
Subject: [PATCH v48 9/9] [DO NOT MERGE] Add a HOT/SIU benchmark harness

A/B and single-variant benchmark scripts for HOT-indexed updates: build two
postgres variants, run pgbench workloads exercising classic-HOT, non-HOT, and
HOT-indexed paths, and a self-contained bloat probe that reports the skip count
(index writes avoided on unchanged indexes) and changed-index bounding.  Not
for merge; kept for evaluating the feature.
---
 src/test/benchmarks/siu/README.md             |  82 ++++
 src/test/benchmarks/siu/scripts/bloat.sh      |  84 ++++
 src/test/benchmarks/siu/scripts/build.sh      |  54 +++
 .../siu/scripts/hot_indexed_mixed.sql         |  11 +
 .../siu/scripts/hot_indexed_update.sql        |   6 +
 .../benchmarks/siu/scripts/read_indexscan.sql |  11 +
 src/test/benchmarks/siu/scripts/run.sh        | 377 ++++++++++++++++++
 src/test/benchmarks/siu/scripts/soak.sh       | 128 ++++++
 .../benchmarks/siu/scripts/wide_update.sql    |   7 +
 9 files changed, 760 insertions(+)
 create mode 100644 src/test/benchmarks/siu/README.md
 create mode 100755 src/test/benchmarks/siu/scripts/bloat.sh
 create mode 100755 src/test/benchmarks/siu/scripts/build.sh
 create mode 100644 src/test/benchmarks/siu/scripts/hot_indexed_mixed.sql
 create mode 100644 src/test/benchmarks/siu/scripts/hot_indexed_update.sql
 create mode 100644 src/test/benchmarks/siu/scripts/read_indexscan.sql
 create mode 100755 src/test/benchmarks/siu/scripts/run.sh
 create mode 100755 src/test/benchmarks/siu/scripts/soak.sh
 create mode 100644 src/test/benchmarks/siu/scripts/wide_update.sql

diff --git a/src/test/benchmarks/siu/README.md b/src/test/benchmarks/siu/README.md
new file mode 100644
index 00000000000..9a62d268ad7
--- /dev/null
+++ b/src/test/benchmarks/siu/README.md
@@ -0,0 +1,82 @@
+# hot-indexed (HOT-indexed) A/B benchmark harness
+
+Two postgres variants, identical pgdata layouts, pgbench workloads
+exercising classic HOT, non-HOT, and HOT-indexed paths.
+
+## Contents
+
+- `scripts/build.sh` -- builds two postgres variants (`master` = tepid's
+  merge-base with origin/master; `tepid` = the branch under test).  Requires
+  a writable benchmark root via `BENCH` (default `/scratch/tepid-bench`).
+- `scripts/run.sh` -- A/B driver.  Runs `simple_update` (pgbench -N),
+  `hot_indexed_update`, `hot_indexed_mixed`, `read_indexscan`, and `wide_N`
+  for N in `$WIDE_STEPS`.
+  Collects TPS, latency, WAL bytes, HOT update count, pre/post heap and
+  index size, peak CPU% and RSS.  Writes a CSV per run to `$BENCH/results/`.
+- `scripts/soak.sh` -- long-running single-workload driver that samples
+  TPS/HOT%/WAL/bloat every `$SAMPLE` seconds under `$DURATION` seconds
+  of constant pressure, per variant.
+- `scripts/bloat.sh` -- single-variant bloat probe.  Runs update+vacuum cycles
+  on a table whose changed column and an unchanged column are both indexed, and
+  reports (via pgstattuple) that the changed index stays bounded with periodic
+  VACUUM but grows unbounded without it, plus the skip count showing
+  HOT-indexed updates avoiding the unchanged index.  Spins its own throwaway
+  cluster, so it does not touch the A/B pgdata.
+- `scripts/hot_indexed_update.sql` -- `UPDATE siu_table SET b = rand WHERE a = rand`.
+- `scripts/hot_indexed_mixed.sql`  -- 80 % SELECT by PK + 20 % indexed-col UPDATE.
+- `scripts/read_indexscan.sql` -- read-only btree index scans on a freshly
+  reset `siu_table` (no stale entries); confirms the HOT-indexed read path
+  adds no per-scan overhead, since the crossed-attribute bitmap decides
+  staleness without a key comparison and the scan requests no index tuple.
+- `scripts/wide_update.sql` -- driver script for the wide-table workload;
+  the `SET` clause is built at run time from `$WIDE_STEPS`.
+
+## Running
+
+```
+# Build both variants (run once per benchmark host)
+REPO=$HOME/ws/postgres/tepid BENCH=/scratch/tepid-bench \
+  ./scripts/build.sh
+
+# Standard A/B
+SCALE=20 CLIENTS=16 THREADS=8 DURATION=120 \
+  WIDE_COLS=16 WIDE_STEPS=0,1,2,4,8,16 \
+  ./scripts/run.sh
+
+# Soak
+SCALE=50 CLIENTS=16 THREADS=8 DURATION=900 SAMPLE=60 \
+  ./scripts/soak.sh
+
+# Bloat probe (single variant; defaults to the tepid build)
+BENCH=/scratch/tepid-bench ROWS=5000 CYCLES=8 UPDATES=20 \
+  ./scripts/bloat.sh
+```
+
+## Env vars
+
+```
+REPO         path to postgres source (has .git)
+BENCH        bench root (install prefixes, build trees, results)
+SCALE        pgbench -s (also drives siu_table row count = SCALE*100k)
+CLIENTS      pgbench -c
+THREADS      pgbench -j
+DURATION     seconds per workload
+WIDE_COLS    number of indexed int columns in wide_table (default 16)
+WIDE_STEPS   comma-separated list of columns-modified values to exercise
+             (default 0,1,4,8,16)
+PORT         postgres port for the bench servers
+SHARED_BUFFERS  postgresql.conf setting (default 512MB)
+MASTER_REV   revision for the master variant (default: tepid's merge-base
+             with origin/master)
+TEPID_REV    revision for the tepid variant (default: tepid)
+
+bloat.sh only:
+BINDIR       postgres bin dir to test (default: tepid variant under $BENCH)
+ROWS         seeded rows (default 5000)
+CYCLES       update+vacuum cycles (default 8)
+UPDATES      updates per row per cycle (default 20)
+```
+
+The scripts are portable between Linux and FreeBSD; the CPU/RSS sampler
+uses `ps -o pcpu=,rss= --ppid LEADER -p LEADER` (Linux) or `pgrep -P` +
+per-pid `ps` (FreeBSD) -- peak values are approximate.
diff --git a/src/test/benchmarks/siu/scripts/bloat.sh b/src/test/benchmarks/siu/scripts/bloat.sh
new file mode 100755
index 00000000000..34171c68f5b
--- /dev/null
+++ b/src/test/benchmarks/siu/scripts/bloat.sh
@@ -0,0 +1,84 @@
+#!/usr/bin/env bash
+# Single-variant bloat benchmark for HOT-indexed (SIU) updates.
+#
+# The A/B run.sh measures TPS/WAL/aggregate size; it does not isolate two
+# bloat properties of the HOT-indexed path, which this script demonstrates:
+#
+#   1. Stale index entries on the CHANGED index accumulate between vacuums,
+#      but VACUUM reclaims them, so size stays bounded across update+vacuum
+#      cycles.  Skipping vacuum lets them grow unbounded (the inherent
+#      inter-vacuum bloat; read-filtered by the crossed-attribute bitmap meanwhile).
+#   2. An index on an UNCHANGED column is skipped by HOT-indexed updates (the
+#      selective-update benefit) -- visible as a skip count.  Updates that fall
+#      back to non-HOT (e.g. when the page has no room for the chain) still
+#      insert into it, so its size reflects only the non-HOT remainder.
+#
+# Uses the tepid variant built by build.sh (override BINDIR for any build) and
+# a throwaway cluster under $BENCH, so it never touches the A/B pgdata.
+#
+# Env: BENCH (default /scratch/siu-bench), BINDIR (default tepid variant bin),
+#      PORT (default 57481), ROWS (default 5000), CYCLES (default 8),
+#      UPDATES (updates per row per cycle, default 20).
+set -euo pipefail
+
+BENCH=${BENCH:-/scratch/siu-bench}
+BINDIR=${BINDIR:-$BENCH/tepid/usr/local/pgsql/bin}
+PORT=${PORT:-57481}
+ROWS=${ROWS:-5000}
+CYCLES=${CYCLES:-8}
+UPDATES=${UPDATES:-20}
+DATADIR=$BENCH/_data_bloat
+
+base=$(dirname "$BINDIR")
+if [ -d "$base/lib64" ]; then
+  export LD_LIBRARY_PATH="$base/lib64${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
+else
+  export LD_LIBRARY_PATH="$base/lib${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
+fi
+
+PSQL=("$BINDIR/psql" -h /tmp -p "$PORT" -U postgres -X -q)
+P() { "${PSQL[@]}" -At "$@"; }
+
+"$BINDIR/pg_ctl" -D "$DATADIR" stop -m fast >/dev/null 2>&1 || true
+rm -rf "$DATADIR"
+"$BINDIR/initdb" -D "$DATADIR" -U postgres --no-sync >/dev/null
+cat >> "$DATADIR/postgresql.conf" <<EOF
+port = $PORT
+autovacuum = off
+fsync = off
+EOF
+"$BINDIR/pg_ctl" -D "$DATADIR" -o "-p $PORT" -l "$BENCH/_bloat.log" -w start >/dev/null
+trap '"$BINDIR/pg_ctl" -D "$DATADIR" stop -m fast >/dev/null 2>&1 || true' EXIT
+"${PSQL[@]}" -c "CREATE EXTENSION IF NOT EXISTS pgstattuple;" >/dev/null
+
+# $1 = table name, $2 = vacuum_each (1/0).  Returns final idx_a composition.
+run_arm() {
+  local tbl=$1 vac=$2
+  "${PSQL[@]}" <<SQL >/dev/null
+DROP TABLE IF EXISTS $tbl;
+CREATE TABLE $tbl (id int PRIMARY KEY, a int, b int, pad text) WITH (fillfactor = 50);
+CREATE INDEX ${tbl}_a ON $tbl(a);   -- changed column
+CREATE INDEX ${tbl}_b ON $tbl(b);   -- never changed
+INSERT INTO $tbl SELECT g, g, g, repeat('x', 40) FROM generate_series(1, $ROWS) g;
+VACUUM (FREEZE, ANALYZE) $tbl;
+SQL
+  local cyc
+  for ((cyc = 1; cyc <= CYCLES; cyc++)); do
+    "${PSQL[@]}" -c "DO \$\$ BEGIN FOR u IN 1..$UPDATES LOOP UPDATE $tbl SET a = a + 1; END LOOP; END \$\$;" >/dev/null
+    [ "$vac" = 1 ] && "${PSQL[@]}" -c "VACUUM $tbl;" >/dev/null
+  done
+  P -c "SELECT '$tbl(vacuum_each=$vac):'
+        || ' idx_a_kb=' || pg_relation_size('${tbl}_a')/1024
+        || ' idx_a_live=' || (SELECT tuple_count FROM pgstattuple('${tbl}_a'))
+        || ' idx_a_free%=' || round((SELECT free_percent FROM pgstattuple('${tbl}_a'))::numeric,1)
+        || ' idx_b_kb=' || pg_relation_size('${tbl}_b')/1024
+        || ' idx_b_skips=' || coalesce((SELECT n_tup_hot_indexed_upd_skipped
+                                        FROM pg_stat_all_indexes WHERE indexrelname='${tbl}_b'), 0);"
+}
+
+echo "=== HOT-indexed bloat: $CYCLES cycles x ($UPDATES updates/row x $ROWS rows) ==="
+run_arm t_vac 1
+run_arm t_novac 0
+echo "idx_a: bounded with vacuum_each=1, unbounded with =0 (stale entries accumulate"
+echo "until reclaimed).  idx_b_skips counts entries the HOT-indexed path avoided on"
+echo "the unchanged index; idx_b_kb is only the non-HOT-fallback remainder."
diff --git a/src/test/benchmarks/siu/scripts/build.sh b/src/test/benchmarks/siu/scripts/build.sh
new file mode 100755
index 00000000000..b2f0ee525d4
--- /dev/null
+++ b/src/test/benchmarks/siu/scripts/build.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+# Build two postgres variants for tepid (HOT-indexed) A/B benchmarks.
+#
+# Env vars (all optional):
+#   REPO          -- path to postgres source repo (default: $HOME/ws/postgres/tepid, or /scratch/siu-bench/repo)
+#   BENCH         -- bench root (default: /scratch/siu-bench)
+#   MASTER_REV    -- revision for the "master" variant (default: tepid's merge-base with origin/master)
+#   TEPID_REV     -- revision for the "tepid"  variant (default: tepid)
+#   JOBS          -- parallel compile jobs (default: nproc or 8)
+set -euo pipefail
+
+BENCH=${BENCH:-/scratch/siu-bench}
+JOBS=${JOBS:-$( (command -v nproc >/dev/null && nproc) || sysctl -n hw.ncpu 2>/dev/null || echo 8 )}
+if [ -z "${REPO:-}" ]; then
+  for candidate in "$HOME/ws/postgres/tepid" "$BENCH/repo" /scratch/pg; do
+    if [ -d "$candidate/.git" ]; then REPO=$candidate; break; fi
+  done
+fi
+: "${REPO:?REPO not set and no default found}"
+cd "$REPO"
+
+TEPID_REV=${TEPID_REV:-tepid}
+MASTER_REV=${MASTER_REV:-$(git merge-base "$TEPID_REV" origin/master 2>/dev/null || git merge-base "$TEPID_REV" master)}
+
+echo "REPO=$REPO  MASTER=$MASTER_REV  TEPID=$TEPID_REV  JOBS=$JOBS  BENCH=$BENCH"
+
+die() { printf 'build: %s\n' "$*" >&2; exit 1; }
+if git status --porcelain | grep -v '^??' | grep -q .; then
+  die "repo has unstaged/uncommitted changes; stash or commit first"
+fi
+
+build_variant() {
+  local name=$1
+  local rev=$2
+  local prefix=$BENCH/$name
+  echo "=== building $name ($rev) into $prefix"
+  [ -d "$prefix" ] && find "$prefix" -mindepth 1 -delete && rmdir "$prefix"
+  mkdir -p "$prefix"
+  git checkout --quiet --detach "$rev"
+  local bld=$BENCH/_build_$name
+  [ -d "$bld" ] && find "$bld" -mindepth 1 -delete && rmdir "$bld"
+  meson setup "$bld" --prefix="$prefix/usr/local/pgsql" \
+    -Dbuildtype=release -Dcassert=false \
+    -Dextra_version=-siubench-$name >/dev/null
+  meson compile -C "$bld" -j "$JOBS"
+  meson install -C "$bld" --destdir=/ >/dev/null
+  "$prefix/usr/local/pgsql/bin/postgres" --version
+}
+
+ORIG=$(git symbolic-ref --quiet --short HEAD || git rev-parse HEAD)
+trap 'git checkout --quiet "$ORIG"' EXIT
+
+build_variant master "$MASTER_REV"
+build_variant tepid  "$TEPID_REV"
diff --git a/src/test/benchmarks/siu/scripts/hot_indexed_mixed.sql b/src/test/benchmarks/siu/scripts/hot_indexed_mixed.sql
new file mode 100644
index 00000000000..3ab3289df27
--- /dev/null
+++ b/src/test/benchmarks/siu/scripts/hot_indexed_mixed.sql
@@ -0,0 +1,11 @@
+-- Mixed workload: 80% selects, 20% indexed-column updates.
+-- Exercises both the hot-indexed writer and the crossed-attribute-bitmap reader.
+\set aid random(1, :scale * 100000)
+\set bid random(1, 1000000)
+\set which random(1, 100)
+BEGIN;
+SELECT * FROM siu_table WHERE a = :aid;
+\if :which > 80
+  UPDATE siu_table SET b = :bid WHERE a = :aid;
+\endif
+COMMIT;
diff --git a/src/test/benchmarks/siu/scripts/hot_indexed_update.sql b/src/test/benchmarks/siu/scripts/hot_indexed_update.sql
new file mode 100644
index 00000000000..f1bcf959c67
--- /dev/null
+++ b/src/test/benchmarks/siu/scripts/hot_indexed_update.sql
@@ -0,0 +1,6 @@
+-- hot-indexed-friendly workload: narrow table with a few non-PK indexes.
+-- Each UPDATE changes a non-summarizing indexed column on a random row.
+-- With hot-indexed this is HOT-indexed; without hot-indexed it is non-HOT.
+\set aid random(1, :scale * 100000)
+\set new_b random(1, 1000000)
+UPDATE siu_table SET b = :new_b WHERE a = :aid;
diff --git a/src/test/benchmarks/siu/scripts/read_indexscan.sql b/src/test/benchmarks/siu/scripts/read_indexscan.sql
new file mode 100644
index 00000000000..465689763ea
--- /dev/null
+++ b/src/test/benchmarks/siu/scripts/read_indexscan.sql
@@ -0,0 +1,11 @@
+-- read_indexscan: read-only btree index-scan workload confirming the
+-- HOT-indexed read path adds no per-scan overhead on tables with no stale
+-- entries.
+-- The crossed-attribute bitmap decides staleness without a key comparison and
+-- the scan does not request the index tuple, so on a freshly reset siu_table
+-- (no stale HOT-indexed entries) there should be no master-vs-tepid
+-- difference on this cell.  The predicate is an equality on the
+-- indexed column b and the target list includes the non-indexed column e,
+-- forcing a plain (heap-fetching) index scan rather than an index-only scan.
+\set id random(1, :rows)
+SELECT a, b, c, d, e FROM siu_table WHERE b = :id;
diff --git a/src/test/benchmarks/siu/scripts/run.sh b/src/test/benchmarks/siu/scripts/run.sh
new file mode 100755
index 00000000000..fa330ad4f6f
--- /dev/null
+++ b/src/test/benchmarks/siu/scripts/run.sh
@@ -0,0 +1,377 @@
+#!/usr/bin/env bash
+# A/B pgbench harness for tepid: master (upstream) vs tepid (HOT-indexed).
+#
+# Env vars:
+#   SCALE       -- pgbench -s (also multiplier for siu_table row count = SCALE*100k)
+#   CLIENTS     -- pgbench -c
+#   THREADS     -- pgbench -j
+#   DURATION    -- pgbench -T (seconds per workload)
+#   WIDE_COLS   -- # of indexed columns in the wide_table (default 16)
+#   WIDE_STEPS  -- comma-separated list of "updated columns" counts for
+#                 the wide workload (default "0,1,4,8,WIDE_COLS")
+#   PORT        -- postgres port (default 57480)
+#
+# For each variant in {master, tepid}:
+#   initdb fresh pgdata, start postgres, create test objects,
+#   run workloads (pgbench -N simple_update, hot_indexed_update, hot_indexed_mixed,
+#   read_indexscan, and wide_N for each value in WIDE_STEPS), collect TPS + HOT
+#   counts + WAL delta + peak CPU/RSS sampled via pidstat.
+# Emits CSV + Markdown summary under /scratch/siu-bench/results/.
+set -euo pipefail
+
+BENCH=${BENCH:-/scratch/siu-bench}
+SCALE=${SCALE:-20}
+CLIENTS=${CLIENTS:-16}
+THREADS=${THREADS:-8}
+DURATION=${DURATION:-120}
+WIDE_COLS=${WIDE_COLS:-16}
+WIDE_STEPS=${WIDE_STEPS:-0,1,4,8,16}
+PORT=${PORT:-57480}
+
+TS=$(date -u +%Y%m%dT%H%M%SZ)
+OUT=$BENCH/results/$TS.csv
+LOGDIR=$BENCH/logs/$TS
+mkdir -p "$LOGDIR"
+echo "variant,workload,tps,latency_avg_ms,classic_hot_updates,hot_indexed_updates,non_hot_updates,total_updates,wal_bytes,bloat_pages_before,bloat_pages_after,index_size_before,index_size_after,cpu_pct_peak,rss_mib_peak,per_index_before,per_index_after" > "$OUT"
+echo "=== siu-bench A/B run $TS -> $OUT (scale=$SCALE clients=$CLIENTS threads=$THREADS duration=${DURATION}s)"
+
+bin_of() {
+  echo "$BENCH/$1/usr/local/pgsql/bin"
+}
+
+LD_of() {
+  local base=$BENCH/$1/usr/local/pgsql
+  # Linux distros that split 64-bit libs use lib64; most others use lib.
+  if [ -d "$base/lib64" ]; then
+    echo "$base/lib64"
+  else
+    echo "$base/lib"
+  fi
+}
+
+psql_as() {
+  local v=$1; shift
+  LD_LIBRARY_PATH="$(LD_of "$v")" "$(bin_of "$v")/psql" -h /tmp -p "$PORT" -U postgres -X "$@"
+}
+
+pgbench_as() {
+  local v=$1; shift
+  LD_LIBRARY_PATH="$(LD_of "$v")" "$(bin_of "$v")/pgbench" -h /tmp -p "$PORT" -U postgres "$@"
+}
+
+start_pg() {
+  local v=$1
+  local datadir=$BENCH/_data_$v
+  [ -d "$datadir" ] && find "$datadir" -mindepth 1 -delete && rmdir "$datadir"
+  mkdir -p "$datadir"
+
+  LD_LIBRARY_PATH="$(LD_of "$v")" "$(bin_of "$v")/initdb" -D "$datadir" -U postgres >"$LOGDIR/initdb_$v.log" 2>&1
+  local sb=${SHARED_BUFFERS:-512MB}
+  cat >> "$datadir/postgresql.conf" <<EOF
+shared_buffers = $sb
+work_mem = 32MB
+max_wal_size = 4GB
+synchronous_commit = on
+checkpoint_timeout = 10min
+wal_level = replica
+log_destination = 'stderr'
+logging_collector = off
+port = $PORT
+EOF
+  LD_LIBRARY_PATH="$(LD_of "$v")" "$(bin_of "$v")/pg_ctl" -D "$datadir" \
+    -o "-p $PORT" -l "$LOGDIR/pg_$v.log" start >/dev/null
+  sleep 2
+}
+
+stop_pg() {
+  local v=$1
+  local datadir=$BENCH/_data_$v
+  LD_LIBRARY_PATH="$(LD_of "$v")" "$(bin_of "$v")/pg_ctl" -D "$datadir" stop -m fast >/dev/null 2>&1 || true
+}
+
+postmaster_pid() {
+  local v=$1
+  head -1 "$BENCH/_data_$v/postmaster.pid" 2>/dev/null
+}
+
+setup_schemas() {
+  local v=$1
+  seed_siu_table "$v"
+  seed_wide_table "$v"
+  # pgbench schema for built-in simple_update.
+  LD_LIBRARY_PATH="$(LD_of "$v")" "$(bin_of "$v")/pgbench" -h /tmp -p "$PORT" -U postgres \
+    -i -s "$SCALE" -q postgres >"$LOGDIR/pgbench_init_$v.log" 2>&1
+}
+
+# seed_siu_table: (re)create the narrow table used by the siu_* workloads.
+seed_siu_table() {
+  local v=$1
+  local rows=$((SCALE * 100000))
+  psql_as "$v" <<SQL
+DROP TABLE IF EXISTS siu_table;
+CREATE TABLE siu_table(a int PRIMARY KEY, b int, c int, d int, e text);
+CREATE INDEX siu_b ON siu_table(b);
+CREATE INDEX siu_c ON siu_table(c);
+CREATE INDEX siu_d ON siu_table(d);
+INSERT INTO siu_table
+  SELECT i, i, i, i, repeat('x', 20) FROM generate_series(1, $rows) AS i;
+VACUUM (FULL, ANALYZE) siu_table;
+CHECKPOINT;
+SQL
+}
+
+# seed_wide_table: (re)create the wide table with WIDE_COLS indexed columns.
+seed_wide_table() {
+  local v=$1
+  local coldefs="" insertcols="" insertvals="" idxlist=""
+  for i in $(seq 1 "$WIDE_COLS"); do
+    coldefs+=", c$i int"
+    insertcols+=", c$i"
+    insertvals+=", i"
+    idxlist+="CREATE INDEX wide_c$i ON wide_table(c$i); "
+  done
+  local wide_rows=$((SCALE * 1000))
+  psql_as "$v" <<SQL
+DROP TABLE IF EXISTS wide_table;
+CREATE TABLE wide_table(id int PRIMARY KEY $coldefs);
+$idxlist
+INSERT INTO wide_table(id $insertcols) SELECT i $insertvals FROM generate_series(1, $wide_rows) AS i;
+VACUUM (FULL, ANALYZE) wide_table;
+CHECKPOINT;
+SQL
+}
+
+# reset_state: restore a workload's target table to its seeded baseline.
+# Used between workloads so per-workload bloat/idx_size deltas are not
+# polluted by carryover from earlier workloads in the same variant run.
+# For pgbench_accounts we re-initialise via `pgbench -i`; for our
+# hand-rolled tables we drop + recreate + reseed.
+reset_state() {
+  local v=$1 table=$2
+  case "$table" in
+    pgbench_accounts)
+      LD_LIBRARY_PATH="$(LD_of "$v")" "$(bin_of "$v")/pgbench" -h /tmp -p "$PORT" -U postgres \
+        -i -s "$SCALE" -q postgres >>"$LOGDIR/pgbench_init_$v.log" 2>&1
+      psql_as "$v" -c "CHECKPOINT" >/dev/null
+      ;;
+    siu_table)
+      seed_siu_table "$v"
+      ;;
+    wide_table)
+      seed_wide_table "$v"
+      ;;
+    *)
+      echo "reset_state: unknown table $table" >&2
+      return 1
+      ;;
+  esac
+  psql_as "$v" -c "SELECT pg_stat_reset_single_table_counters('$table'::regclass::oid)" >/dev/null
+}
+
+bloat_stats() {
+  local v=$1 table=$2
+  psql_as "$v" -Atc "SELECT pg_table_size('$table')/8192 || ',' || pg_indexes_size('$table')"
+}
+
+# siu_count: number of HOT-indexed updates observed on $table since its
+# pgstat counters were last reset.  Returns "0" on master (where the
+# counter column does not exist) so the CSV column stays numeric.
+siu_count() {
+  local v=$1 table=$2
+  local val
+  val=$(psql_as "$v" -Atc \
+    "SELECT coalesce(n_tup_hot_indexed_upd, 0) FROM pg_stat_user_tables WHERE relname='$table'" 2>/dev/null)
+  [[ "$val" =~ ^[0-9]+$ ]] || val=0
+  echo "$val"
+}
+
+# per_index_sizes: emit "idx1=bytes;idx2=bytes;..." for the indexes on
+# $table, sorted by indexrelid.  Used by the wide_* workloads so we can
+# see per-column index growth rather than just the aggregate.  Returns
+# the literal "none" when $table has no indexes.
+per_index_sizes() {
+  local v=$1 table=$2
+  local out
+  out=$(psql_as "$v" -Atc "SELECT string_agg(
+           i.relname || '=' || pg_relation_size(i.oid)::text,
+           ';' ORDER BY i.oid)
+         FROM pg_class t
+         JOIN pg_index ix ON ix.indrelid = t.oid
+         JOIN pg_class i  ON i.oid = ix.indexrelid
+         WHERE t.relname = '$table'")
+  [ -n "$out" ] || out="none"
+  echo "$out"
+}
+
+sample_peak() {
+  # Sample CPU / RSS of the postmaster tree for $DURATION+5 seconds.
+  # Writes "peak_cpu_pct,peak_rss_mib" to the given outfile.  Portable across
+  # Linux / FreeBSD (falls back to pgrep + per-pid ps where --ppid isn't
+  # available).  Returns 'NA,NA' if the sampler can't collect useful data.
+  local outfile=$1 v=$2
+  local leader
+  leader=$(postmaster_pid "$v")
+  [ -z "$leader" ] && { echo "NA,NA" > "$outfile"; return; }
+  local dur=$(( DURATION + 5 ))
+  (
+    local max_cpu=0
+    local max_rss=0
+    local t0=$(date +%s)
+    while :; do
+      # Children of the leader + the leader itself.
+      local pids
+      pids=$( (pgrep -P "$leader" 2>/dev/null; echo "$leader") | tr '\n' ' ')
+      local sample
+      sample=$(ps -o pcpu=,rss= -p $pids 2>/dev/null | \
+               awk '{cpu+=$1; rss+=$2} END{printf "%.1f %d\n", cpu+0, rss+0}')
+      local c r
+      read -r c r <<<"$sample"
+      if [ -n "${c:-}" ] && [ -n "${r:-}" ]; then
+        awk -v m="$max_cpu" -v c="$c" 'BEGIN{exit !(c>m)}' && max_cpu=$c
+        [ "$r" -gt "$max_rss" ] 2>/dev/null && max_rss=$r
+      fi
+      local now=$(date +%s)
+      [ $((now - t0)) -ge "$dur" ] && break
+      sleep 1
+    done
+    local rss_mib=$(( max_rss / 1024 ))
+    echo "$max_cpu,$rss_mib" > "$outfile"
+  ) &
+  echo $!
+}
+
+run_one() {
+  local v=$1 workload=$2 script=$3 table=${4:-siu_table} extra_set=${5:-}
+
+  local wal_start wal_end hot_start hot_end total_start total_end tps lat
+  local siu_start siu_end
+  local bloat_before bloat_after idx_before idx_after
+  local per_idx_before per_idx_after
+  read -r bloat_before idx_before <<<"$(bloat_stats "$v" "$table" | tr , ' ')"
+  per_idx_before=$(per_index_sizes "$v" "$table")
+
+  wal_start=$(psql_as "$v" -Atc "SELECT pg_current_wal_lsn()::text")
+  hot_start=$(psql_as "$v" -Atc "SELECT coalesce(n_tup_hot_upd,0) FROM pg_stat_user_tables WHERE relname='$table'")
+  siu_start=$(siu_count "$v" "$table")
+  total_start=$(psql_as "$v" -Atc "SELECT coalesce(n_tup_upd,0) FROM pg_stat_user_tables WHERE relname='$table'")
+
+  local out="$LOGDIR/${v}_${workload}.log"
+  local cpu_rss_file=$LOGDIR/${v}_${workload}.cpu
+  local sampler_pid
+  sampler_pid=$(sample_peak "$cpu_rss_file" "$v")
+
+  set +e
+  case "$workload" in
+    simple_update)
+      pgbench_as "$v" -N -c "$CLIENTS" -j "$THREADS" -T "$DURATION" \
+        -n postgres >"$out" 2>&1
+      ;;
+    wide_*)
+      # build the SET clause from extra_set which is "c1=:v,c2=:v,..."
+      pgbench_as "$v" -f <(sed "s/:wide_set_clause/$extra_set/" "$script") \
+        -c "$CLIENTS" -j "$THREADS" -T "$DURATION" \
+        -D "scale=$SCALE" -n postgres >"$out" 2>&1
+      ;;
+    read_indexscan)
+      # read-only; pass the row count so the script can pick random keys
+      pgbench_as "$v" -f "$script" -c "$CLIENTS" -j "$THREADS" -T "$DURATION" \
+        -D "rows=$((SCALE * 100000))" -n postgres >"$out" 2>&1
+      ;;
+    *)
+      pgbench_as "$v" -f "$script" -c "$CLIENTS" -j "$THREADS" -T "$DURATION" \
+        -n postgres >"$out" 2>&1
+      ;;
+  esac
+  set -e
+
+  wait "$sampler_pid" 2>/dev/null || true
+  local cpu_rss
+  cpu_rss=$(cat "$cpu_rss_file" 2>/dev/null || echo "NA,NA")
+
+  tps=$(awk '/tps = /{print $3; exit}' "$out")
+  lat=$(awk '/latency average = /{print $4; exit}' "$out")
+  tps=${tps:-NA}
+  lat=${lat:-NA}
+
+  wal_end=$(psql_as "$v" -Atc "SELECT pg_current_wal_lsn()::text")
+  hot_end=$(psql_as "$v" -Atc "SELECT coalesce(n_tup_hot_upd,0) FROM pg_stat_user_tables WHERE relname='$table'")
+  siu_end=$(siu_count "$v" "$table")
+  total_end=$(psql_as "$v" -Atc "SELECT coalesce(n_tup_upd,0) FROM pg_stat_user_tables WHERE relname='$table'")
+
+  local wal_bytes
+  wal_bytes=$(psql_as "$v" -Atc "SELECT pg_wal_lsn_diff('$wal_end'::pg_lsn, '$wal_start'::pg_lsn)::bigint")
+
+  # Capture a WAL record-type histogram for this workload.  pg_waldump's
+  # --stats=record output is rich (~60 lines) so stash it in LOGDIR
+  # rather than trying to fold into the CSV.  Tolerate failures: if the
+  # segment containing wal_start has been recycled (rare with
+  # max_wal_size=4GB but possible under long chained runs), we emit a
+  # note and move on instead of aborting the whole run.
+  local wal_stats_file=$LOGDIR/${v}_${workload}.walstats
+  LD_LIBRARY_PATH="$(LD_of "$v")" "$(bin_of "$v")/pg_waldump" \
+    --stats=record -p "$BENCH/_data_$v/pg_wal" \
+    --start="$wal_start" --end="$wal_end" \
+    > "$wal_stats_file" 2> "${wal_stats_file}.err" \
+    || echo "pg_waldump unavailable for this range; see ${wal_stats_file}.err" > "$wal_stats_file"
+
+  read -r bloat_after idx_after <<<"$(bloat_stats "$v" "$table" | tr , ' ')"
+  per_idx_after=$(per_index_sizes "$v" "$table")
+
+  local hot=$((hot_end - hot_start))
+  local siu=$((siu_end - siu_start))
+  local tot=$((total_end - total_start))
+  local classic_hot=$((hot - siu))
+  local non_hot=$((tot - hot))
+
+  printf '%s,%s,%s,%s,%d,%d,%d,%d,%s,%s,%s,%s,%s,%s,%s,%s\n' \
+    "$v" "$workload" "$tps" "$lat" "$classic_hot" "$siu" "$non_hot" "$tot" \
+    "$wal_bytes" \
+    "$bloat_before" "$bloat_after" \
+    "$idx_before" "$idx_after" \
+    "$cpu_rss" "$per_idx_before" "$per_idx_after" >> "$OUT"
+  printf '  %-8s %-14s tps=%10s lat=%6s classic_hot=%7d hi=%7d non_hot=%7d tot=%-7d wal=%12s bloat=%s->%s idx=%s->%s cpu_rss=%s\n' \
+    "$v" "$workload" "$tps" "$lat" "$classic_hot" "$siu" "$non_hot" "$tot" "$wal_bytes" \
+    "$bloat_before" "$bloat_after" "$idx_before" "$idx_after" "$cpu_rss"
+}
+
+build_wide_set_clause() {
+  # emit e.g. "c1=:v,c2=:v,...,cN=:v" for first N cols.
+  local n=$1
+  if [ "$n" -eq 0 ]; then
+    # No indexed-col update; touch a non-indexed column (id % 1 so it's a no-op)
+    echo "id=id"
+    return
+  fi
+  local clauses=""
+  for i in $(seq 1 "$n"); do
+    [ -n "$clauses" ] && clauses+=","
+    clauses+="c$i=:v"
+  done
+  echo "$clauses"
+}
+
+for v in master tepid; do
+  echo "--- variant: $v"
+  stop_pg "$v" || true
+  start_pg "$v"
+  setup_schemas "$v"
+
+  run_one "$v" simple_update ''                    pgbench_accounts
+  reset_state "$v" siu_table
+  run_one "$v" hot_indexed_update    "$BENCH/scripts/hot_indexed_update.sql"  siu_table
+  reset_state "$v" siu_table
+  run_one "$v" hot_indexed_mixed     "$BENCH/scripts/hot_indexed_mixed.sql"   siu_table
+  reset_state "$v" siu_table
+  run_one "$v" read_indexscan        "$BENCH/scripts/read_indexscan.sql"      siu_table
+
+  for n in ${WIDE_STEPS//,/ }; do
+    reset_state "$v" wide_table
+    run_one "$v" "wide_${n}" "$BENCH/scripts/wide_update.sql" wide_table \
+            "$(build_wide_set_clause "$n")"
+  done
+
+  stop_pg "$v"
+done
+
+echo "=== results: $OUT"
+column -t -s, "$OUT" | head -50
diff --git a/src/test/benchmarks/siu/scripts/soak.sh b/src/test/benchmarks/siu/scripts/soak.sh
new file mode 100755
index 00000000000..6d127f1c012
--- /dev/null
+++ b/src/test/benchmarks/siu/scripts/soak.sh
@@ -0,0 +1,128 @@
+#!/usr/bin/env bash
+# tepid soak: run hot_indexed_update for $DURATION seconds on each variant, sampling
+# TPS / HOT-rate / WAL volume / table+index bloat every $SAMPLE seconds.
+# Emits a CSV with one sample row per tick per variant.
+set -euo pipefail
+
+BENCH=${BENCH:-/scratch/siu-bench}
+SCALE=${SCALE:-50}
+CLIENTS=${CLIENTS:-16}
+THREADS=${THREADS:-8}
+DURATION=${DURATION:-900}     # 15 minutes
+SAMPLE=${SAMPLE:-60}          # every 60 s
+PORT=${PORT:-57503}
+SHARED_BUFFERS=${SHARED_BUFFERS:-2GB}
+
+TS=$(date -u +%Y%m%dT%H%M%SZ)
+OUT=$BENCH/results/soak_$TS.csv
+LOGDIR=$BENCH/logs/soak_$TS
+mkdir -p "$LOGDIR"
+echo "variant,t_secs,tps_instant,hot_pct_instant,heap_pages,index_bytes,wal_bytes_since_start,n_dead_tup" > "$OUT"
+echo "=== soak $TS -> $OUT"
+
+bin_of()  { echo "$BENCH/$1/usr/local/pgsql/bin"; }
+LD_of()   { local b=$BENCH/$1/usr/local/pgsql; [ -d "$b/lib64" ] && echo "$b/lib64" || echo "$b/lib"; }
+
+psql_as() { local v=$1; shift; LD_LIBRARY_PATH="$(LD_of "$v")" "$(bin_of "$v")/psql" -h /tmp -p "$PORT" -U postgres -X "$@"; }
+pgbench_as() { local v=$1; shift; LD_LIBRARY_PATH="$(LD_of "$v")" "$(bin_of "$v")/pgbench" -h /tmp -p "$PORT" -U postgres "$@"; }
+
+start_pg() {
+  local v=$1 datadir=$BENCH/_data_$v
+  [ -d "$datadir" ] && find "$datadir" -mindepth 1 -delete && rmdir "$datadir"
+  mkdir -p "$datadir"
+  LD_LIBRARY_PATH="$(LD_of "$v")" "$(bin_of "$v")/initdb" -D "$datadir" -U postgres >"$LOGDIR/initdb_$v.log" 2>&1
+  cat >> "$datadir/postgresql.conf" <<EOF
+shared_buffers = $SHARED_BUFFERS
+work_mem = 32MB
+max_wal_size = 8GB
+synchronous_commit = on
+checkpoint_timeout = 10min
+wal_level = replica
+autovacuum = on
+autovacuum_naptime = 10s
+autovacuum_vacuum_threshold = 50
+autovacuum_vacuum_scale_factor = 0.1
+port = $PORT
+EOF
+  LD_LIBRARY_PATH="$(LD_of "$v")" "$(bin_of "$v")/pg_ctl" -D "$datadir" \
+    -o "-p $PORT" -l "$LOGDIR/pg_$v.log" start >/dev/null
+  sleep 2
+}
+
+stop_pg() {
+  local v=$1
+  LD_LIBRARY_PATH="$(LD_of "$v")" "$(bin_of "$v")/pg_ctl" -D "$BENCH/_data_$v" stop -m fast >/dev/null 2>&1 || true
+}
+
+setup() {
+  local v=$1 rows=$((SCALE * 100000))
+  psql_as "$v" <<SQL
+DROP TABLE IF EXISTS siu_table;
+CREATE TABLE siu_table(a int PRIMARY KEY, b int, c int, d int, e text);
+CREATE INDEX siu_b ON siu_table(b);
+CREATE INDEX siu_c ON siu_table(c);
+CREATE INDEX siu_d ON siu_table(d);
+INSERT INTO siu_table
+  SELECT i, i, i, i, repeat('x', 20) FROM generate_series(1, $rows) AS i;
+VACUUM (ANALYZE) siu_table;
+SQL
+}
+
+run_soak() {
+  local v=$1
+  echo "--- soak $v for ${DURATION}s, sampling every ${SAMPLE}s"
+  stop_pg "$v" || true
+  start_pg "$v"
+  setup "$v"
+  local wal0
+  wal0=$(psql_as "$v" -Atc "SELECT pg_current_wal_lsn()::text")
+  local hot0 tot0
+  hot0=$(psql_as "$v" -Atc "SELECT coalesce(n_tup_hot_upd,0) FROM pg_stat_user_tables WHERE relname='siu_table'")
+  tot0=$(psql_as "$v" -Atc "SELECT coalesce(n_tup_upd,0)     FROM pg_stat_user_tables WHERE relname='siu_table'")
+  local prev_hot=$hot0 prev_tot=$tot0
+
+  # Drive pgbench in the background; sampler in foreground.
+  pgbench_as "$v" -f "$BENCH/scripts/hot_indexed_update.sql" \
+    -c "$CLIENTS" -j "$THREADS" -T "$DURATION" \
+    -P "$SAMPLE" -n postgres >"$LOGDIR/pgbench_$v.log" 2>&1 &
+  local pgb=$!
+
+  local t=0
+  while [ "$t" -lt "$DURATION" ]; do
+    sleep "$SAMPLE"
+    t=$((t + SAMPLE))
+    local now_hot now_tot wal_now wal_bytes heap_pages idx_bytes n_dead
+    now_hot=$(psql_as "$v" -Atc "SELECT coalesce(n_tup_hot_upd,0) FROM pg_stat_user_tables WHERE relname='siu_table'")
+    now_tot=$(psql_as "$v" -Atc "SELECT coalesce(n_tup_upd,0)     FROM pg_stat_user_tables WHERE relname='siu_table'")
+    wal_now=$(psql_as "$v" -Atc "SELECT pg_current_wal_lsn()::text")
+    wal_bytes=$(psql_as "$v" -Atc "SELECT pg_wal_lsn_diff('$wal_now'::pg_lsn, '$wal0'::pg_lsn)::bigint")
+    heap_pages=$(psql_as "$v" -Atc "SELECT pg_table_size('siu_table')/8192")
+    idx_bytes=$(psql_as "$v" -Atc "SELECT pg_indexes_size('siu_table')")
+    n_dead=$(psql_as "$v" -Atc "SELECT coalesce(n_dead_tup,0) FROM pg_stat_user_tables WHERE relname='siu_table'")
+
+    local d_hot=$((now_hot - prev_hot))
+    local d_tot=$((now_tot - prev_tot))
+    local tps_i hot_pct
+    if [ "$d_tot" -gt 0 ]; then
+      tps_i=$(awk -v d="$d_tot" -v s="$SAMPLE" 'BEGIN{printf "%.1f", d/s}')
+      hot_pct=$(awk -v h="$d_hot" -v t="$d_tot" 'BEGIN{printf "%.1f", 100*h/t}')
+    else
+      tps_i=0; hot_pct=0
+    fi
+    printf '%s,%d,%s,%s,%s,%s,%s,%s\n' "$v" "$t" "$tps_i" "$hot_pct" "$heap_pages" "$idx_bytes" "$wal_bytes" "$n_dead" >> "$OUT"
+    printf '  %-6s t=%-5d tps=%8s hot=%-5s%% heap_pgs=%-7s idx=%-12s wal=%-12s dead=%s\n' \
+      "$v" "$t" "$tps_i" "$hot_pct" "$heap_pages" "$idx_bytes" "$wal_bytes" "$n_dead"
+    prev_hot=$now_hot
+    prev_tot=$now_tot
+  done
+
+  wait "$pgb" 2>/dev/null || true
+  stop_pg "$v"
+}
+
+for v in master tepid; do
+  run_soak "$v"
+done
+
+echo "=== soak results: $OUT"
+column -t -s, "$OUT" | head -80
diff --git a/src/test/benchmarks/siu/scripts/wide_update.sql b/src/test/benchmarks/siu/scripts/wide_update.sql
new file mode 100644
index 00000000000..c2c2ff14ac4
--- /dev/null
+++ b/src/test/benchmarks/siu/scripts/wide_update.sql
@@ -0,0 +1,7 @@
+-- Wide-table workload.  The setup script creates a table with WIDE_COLS integer
+-- columns, each separately btree-indexed.  The workload UPDATEs a
+-- configurable number of those indexed columns per transaction
+-- (WIDE_UPDCOLS env var) on a random row.
+\set rid random(1, :scale * 1000)
+\set v random(1, 1000000000)
+UPDATE wide_table SET :wide_set_clause WHERE id = :rid;
-- 
2.50.1

