#!/usr/bin/env bash
set -euo pipefail
set -E   # make ERR trap propagate into functions/loops

# Get the directory where this script is located
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

###############################################################################
# run_snapbuild_purge_bench.sh - First-principles benchmark for SnapBuildPurgeOlderTxn
#
# HYPOTHESIS (H₁):
#   In-place compaction reduces time and CPU cycles for purging committed.xip,
#   with gains growing with xcnt size and fraction of removed XIDs.
#
# SUCCESS CRITERIA (a priori):
#   ≥25% lower mean purge time at xcnt ≥ 500k with 95% CI not crossing 0,
#   and measurable reduction in cache-misses and cycles/instruction.
#
# THREE-TIER MEASUREMENT:
#   1. Unit microbench (white-box): Isolated purge function with controlled inputs
#   2. Component bench (grey-box): Logical decoding with catalog churn, measure purge
#   3. End-to-end (black-box): Decoder throughput/latency under sustained load
#
# Usage:
#   run_snapbuild_purge_bench.sh [--clean] [--with-baseline] [--profile] [--cold-cache] [--trace] [<patch>]
#
# Env knobs:
#   WORKROOT=$HOME/pg_snapbuild_bench
#   CATALOG_CHURN_DURATION=60        # seconds of catalog churn workload
#   CATALOG_CHURN_CLIENTS=64         # concurrent clients creating tables/indexes
#   REPS=30                          # repetitions per measurement point (≥30 for stats)
#   PROFILE_SECS=30                  # seconds for perf profiling
#   PERF_SUDO=sudo                   # or empty if perf doesn't require sudo
#   DO_COLD_CACHE=0|1                # drop OS cache between runs
#   MICROBENCH_SIZES="100,500,1000,2000,5000,10000"  # xcnt values (realistic committed.xip sizes)
#   MICROBENCH_KEEP_RATIOS="0.9,0.5,0.1,0.01"  # fraction of survivors
#   UNIT_BENCH_ENABLE=1              # enable unit microbench (requires extension)
#   COMPONENT_BENCH_ENABLE=1         # enable component bench (logical decoding)
#   E2E_BENCH_ENABLE=1               # enable end-to-end bench
#
# Outputs:
#   <root>/results/*.csv       - timing data with mean, median, p95, 95% CI
#   <root>/profile/*.perf.*    - perf stat counters (cycles, instructions, cache-misses)
#   <root>/analysis/           - plots and statistical summaries
###############################################################################

# ------------------------- tiny logging helpers ------------------------------
log ()  { printf '\033[1;34m==>\033[0m %s\n' "$*"; }
note()  { printf '     %s\n' "$*"; }
warn () { printf '\033[1;33mWARN:\033[0m %s\n' "$*" >&2; }
die ()  { printf '\033[1;31mERROR:\033[0m %s\n' "$*" >&2; exit 1; }

# ----------------------------- CLI flags -------------------------------------
CLEAN=0
WITH_BASELINE=0
DO_PROFILE=0
DO_COLD_CACHE="${DO_COLD_CACHE:-0}"
PATCH="${PATCH:-}"
TRACE="${TRACE:-0}"

usage () {
  sed -n '1,45p' "$0" | sed 's/^# \{0,1\}//'
  exit 0
}

while (( $# )); do
  case "${1}" in
    --clean) CLEAN=1 ;;
    --with-baseline) WITH_BASELINE=1 ;;
    --profile) DO_PROFILE=1 ;;
    --cold-cache) DO_COLD_CACHE=1 ;;
    --trace) TRACE=1 ;;
    -h|--help) usage ;;
    -*)
      die "Unknown flag: $1" ;;
    *)
      PATCH="${1}" ;;
  esac
  shift || true
done

# ------------------------------- sanity --------------------------------------
[[ -n "$PATCH" && ! -f "$PATCH" ]] && die "Patch file '$PATCH' not found"
if [[ $WITH_BASELINE -eq 1 && -z "$PATCH" ]]; then
  die "--with-baseline requires a patch file"
fi

# Emit context when a command fails
trap 'st=$?; printf "\033[1;33mWARN:\033[0m Aborting (exit=%d) at %s:%d: %s\n" \
  "$st" "${BASH_SOURCE[0]}" "${LINENO}" "${BASH_COMMAND}" >&2' ERR

# Clean shutdown function
cleanup_and_exit() {
  local sig="${1:-INT}"
  echo
  echo "Received $sig, stopping all child processes..."
  
  # Disable traps to prevent recursion
  trap - INT TERM
  
  # Kill all background jobs in this shell
  local pids
  pids=$(jobs -p 2>/dev/null || true)
  if [[ -n "$pids" ]]; then
    echo "$pids" | xargs -r kill -TERM 2>/dev/null || true
    sleep 1
    echo "$pids" | xargs -r kill -KILL 2>/dev/null || true
  fi
  
  exit 130
}

# Handle Ctrl-C and SIGTERM gracefully
trap 'cleanup_and_exit INT' INT
trap 'cleanup_and_exit TERM' TERM

command -v git  >/dev/null || die "Need git"
command -v lsof >/dev/null || die "Need lsof"
command -v awk  >/dev/null || die "Need awk"
command -v bc   >/dev/null || die "Need bc"
if [[ $DO_PROFILE -eq 1 ]]; then
  command -v perf >/dev/null || die "Need perf (sudo apt install linux-tools-$(uname -r))"
fi

# --------------------------- run mode & paths --------------------------------
if   [[ $WITH_BASELINE -eq 1 ]]; then RUN_MODE="both"
elif [[ -n "$PATCH"           ]]; then RUN_MODE="patched_only"
else                                  RUN_MODE="base_only"
fi

tag_from_patch () {
  local p="${1:-}"
  if [[ -z "$p" ]]; then
    echo "vanilla"
  else
    local t; t=$(basename "$p"); t="${t%.*}"; t="${t//[^[:alnum:]_-]/}"
    echo "${t:0:15}"
  fi
}

TAG_BASE="vanilla"
TAG_PATCHED="$(tag_from_patch "${PATCH:-}")"

WORKROOT="${WORKROOT:-$HOME/pg_snapbuild_bench}"
ROOT_BASE="$WORKROOT/$TAG_BASE"
ROOT_PATCHED="$WORKROOT/$TAG_PATCHED"

if [[ "$RUN_MODE" == "both" && "$ROOT_BASE" == "$ROOT_PATCHED" ]]; then
  die "Baseline and patched work roots would be the same. Provide a real patch file."
fi

[[ "$RUN_MODE" != "patched_only" ]] && mkdir -p "$ROOT_BASE/logs"
[[ "$RUN_MODE" != "base_only"    ]] && mkdir -p "$ROOT_PATCHED/logs"

# ------------------------------ tracing/xtrace --------------------------------
enable_xtrace () {
  if [[ "${TRACE:-0}" -eq 1 ]]; then
    local TRACE_ROOT="$WORKROOT/logs"; mkdir -p "$TRACE_ROOT"
    TRACE_FILE="${TRACE_ROOT}/trace.$$.log"
    note "Enabling command trace to ${TRACE_FILE}"
    exec {__trace_fd}>"$TRACE_FILE"
    BASH_XTRACEFD=$__trace_fd
    PS4='+ [${EPOCHREALTIME}] ${BASH_SOURCE##*/}:${LINENO}:${FUNCNAME[0]:-main}: '
    set -x
  fi
}

enable_xtrace

# ------------------------------- helpers -------------------------------------
pick_ports () {
  local start="${1:-5432}"
  for p in $(seq "$start" 60000); do
    if ! lsof -iTCP:"$p" -sTCP:LISTEN >/dev/null 2>&1; then
      echo "$p"; return
    fi
  done
  die "couldn't find free port"
}

pg () { echo "$1/pg/bin/$2"; }

drop_os_caches () {
  local ROOT="$1"
  note "Stopping PostgreSQL before cache drop"
  stop_cluster "$ROOT"
  
  if [[ -f /proc/sys/vm/drop_caches ]]; then
    note "Dropping OS caches"
    sync
    if echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null 2>&1; then
      note "OS caches dropped"
    else
      warn "Failed to drop caches (sudo required)"
    fi
  fi
  sleep 2
}

# ------------------------------- build ---------------------------------------
build_pg () {
  local ROOT="$1" PATCH_FILE="${2:-}"
  local LOG="$ROOT/logs"
  local BASE_REF="${BASE_REF:-REL_17_STABLE}"

  if [[ $CLEAN -eq 1 ]]; then rm -rf "$ROOT"; fi
  mkdir -p "$ROOT" "$LOG"

  if [[ ! -x "$(pg "$ROOT" initdb)" ]]; then
    log "Building PostgreSQL at $ROOT"
    if [[ ! -d "$ROOT/pgsrc" ]]; then
      note "Cloning postgres sources (ref: $BASE_REF)…"
      git clone --depth 1 --branch "$BASE_REF" https://github.com/postgres/postgres "$ROOT/pgsrc" >/dev/null
    fi
    pushd "$ROOT/pgsrc" >/dev/null
      git reset --hard HEAD >/dev/null && git clean -fd >/dev/null
      
      # Pin to specific base commit for reproducibility
      if [[ ! -f "$ROOT/.base_commit" ]]; then
        git rev-parse HEAD > "$ROOT/.base_commit"
        note "Base commit: $(cat "$ROOT/.base_commit")"
      else
        note "Using existing base commit: $(cat "$ROOT/.base_commit")"
        git checkout -f "$(cat "$ROOT/.base_commit")" 2>/dev/null || true
      fi
      
      if [[ -n "$PATCH_FILE" ]]; then
        log "Applying patch: $PATCH_FILE"
        # Check if patch applies cleanly first
        if ! git apply --check "$PATCH_FILE" 2>"$LOG/patch_check.log"; then
          die "Patch does not apply cleanly. Check $LOG/patch_check.log"
        fi
        git apply "$PATCH_FILE"
      fi
      note "Running ./configure"
      CFLAGS='-O2 -ggdb3 -fno-omit-frame-pointer' \
      CXXFLAGS='-O2 -ggdb3 -fno-omit-frame-pointer' \
      LDFLAGS='-Wl,--export-dynamic' \
        ./configure --prefix="$ROOT/pg" >"$LOG/configure.log" 2>&1
      note "Running make install"
      make -s -j"$(nproc)" install >"$LOG/make.log" 2>&1
      make -s -C contrib/test_decoding install >>"$LOG/make.log" 2>&1 || true
      make -s -C contrib/pgbench install >>"$LOG/make.log" 2>&1 || true
      
      # Install Tier 1 micro-benchmark extension if available
      if [[ -f "$SCRIPT_DIR/install_microbench_extension.sh" ]]; then
        note "Installing snapbuild_bench extension (Tier 1 microbench)"
        if bash "$SCRIPT_DIR/install_microbench_extension.sh" "$ROOT/pgsrc" >>"$LOG/extension_install.log" 2>&1; then
          # Build extension using the pg_config from our PostgreSQL build
          export PATH="$ROOT/pg/bin:$PATH"
          export PG_CONFIG="$ROOT/pg/bin/pg_config"
          if make -s -C contrib/snapbuild_bench install >>"$LOG/make.log" 2>&1; then
            note "snapbuild_bench extension installed successfully"
          else
            warn "snapbuild_bench extension build failed (Tier 1 will be skipped)"
            cat "$LOG/make.log" | grep -A5 "snapbuild_bench" >&2 || true
          fi
        else
          warn "snapbuild_bench extension source copy failed (Tier 1 will be skipped)"
        fi
      fi
    popd >/dev/null
  else
    log "Reusing build at $ROOT"
  fi
}

# ----------------------------- cluster lifecycle ------------------------------
init_cluster () {
  local ROOT="$1" PORT="$2" LOG="$ROOT/logs"
  local DATADIR="$ROOT/data"
  rm -rf "$DATADIR"
  "$(pg "$ROOT" initdb)" -D "$DATADIR" --no-locale >"$LOG/initdb.log" 2>&1
  cat >>"$DATADIR/postgresql.conf" <<EOF
port = $PORT
listen_addresses = '127.0.0.1'
shared_buffers = '512MB'
wal_level = logical
max_replication_slots = 10
max_wal_senders = 10
log_min_messages = warning
# High connection count for concurrent workload
max_connections = 600
# Increase catalog churn visibility
autovacuum = off
checkpoint_timeout = 15min
max_wal_size = 4GB
# Help handle high concurrency
shared_preload_libraries = ''
EOF
  "$(pg "$ROOT" pg_ctl)" -D "$DATADIR" -l "$LOG/server.log" start -w
}

stop_cluster () {
  local ROOT="$1"
  "$(pg "$ROOT" pg_ctl)" -D "$ROOT/data" -m fast stop >/dev/null 2>&1 || true
}

restart_cluster () {
  local ROOT="$1"
  local LOG="$ROOT/logs"
  local DATADIR="$ROOT/data"
  note "Restarting PostgreSQL"
  "$(pg "$ROOT" pg_ctl)" -D "$DATADIR" -m fast restart -w -l "$LOG/server.log"
}

# ----------------------- Unit Microbench (Tier 1) ----------------------------
# Requires a C extension that exposes both old and new purge algorithms.
# For now, we'll create a placeholder that shows the structure.
# Users can implement the actual extension based on this template.

create_unit_bench_extension () {
  local ROOT="$1"
  local EXT_DIR="$ROOT/pgsrc/contrib/snapbuild_bench"
  
  if [[ -d "$EXT_DIR" ]]; then
    note "Unit bench extension already exists"
    return 0
  fi
  
  warn "Unit bench extension not found. Skipping tier 1 (unit microbench)."
  warn "To enable: implement extension at $EXT_DIR with functions:"
  warn "  - bench_purge_old(xcnt int, keep_ratio float, reps int) -> record"
  warn "  - bench_purge_new(xcnt int, keep_ratio float, reps int) -> record"
  return 1
}

run_unit_bench () {
  local ROOT="$1" PORT="$2" NAME="$3"
  local psql="$(pg "$ROOT" psql)"
  local OUT_DIR="$ROOT/results/unit"; mkdir -p "$OUT_DIR"
  
  log "[$NAME] Unit microbench (tier 1) - Benchmarking actual SnapBuildPurgeOlderTxn logic"
  
  if ! create_unit_bench_extension "$ROOT"; then
    note "Skipping unit bench (extension not available)"
    return 0
  fi
  
  # Create extension
  "$psql" -h 127.0.0.1 -p "$PORT" -d postgres -c "CREATE EXTENSION IF NOT EXISTS snapbuild_bench;" 2>/dev/null || {
    warn "Extension snapbuild_bench not available, skipping unit bench"
    return 0
  }
  
  local SIZES="${MICROBENCH_SIZES:-100,500,1000,2000,5000,10000}"
  local KEEP_RATIOS="${MICROBENCH_KEEP_RATIOS:-0.9,0.5,0.1,0.01}"
  local REPS="${REPS:-30}"
  local DISTRIBUTIONS="scattered contiguous"
  
  # CSV header
  echo "method,xcnt,keep_ratio,distribution,reps,mean_ns,median_ns,p95_ns,ci_lower_ns,ci_upper_ns,survivors,bytes_moved" \
    > "$OUT_DIR/${NAME}_unit.csv"
  
  IFS=',' read -ra size_arr <<< "$SIZES"
  IFS=',' read -ra ratio_arr <<< "$KEEP_RATIOS"
  
  for method in workspace inplace; do
    for xcnt in "${size_arr[@]}"; do
      for keep in "${ratio_arr[@]}"; do
        for dist in $DISTRIBUTIONS; do
          note "  method=$method xcnt=$xcnt keep_ratio=$keep dist=$dist"
          
          # Call the actual bench_purge function from our extension
          "$psql" -h 127.0.0.1 -p "$PORT" -d postgres -t -A -F',' \
            -c "SELECT * FROM bench_purge('$method', $xcnt, $keep, $REPS, '$dist');" \
            >> "$OUT_DIR/${NAME}_unit.csv" 2>/dev/null || {
              warn "bench_purge failed for method=$method xcnt=$xcnt keep=$keep dist=$dist"
            }
        done
      done
    done
  done
  
  log "Unit bench results: $OUT_DIR/${NAME}_unit.csv"
  note "Benchmarked both OLD (workspace+memcpy) and NEW (inplace) purge methods"
}

# -------------------- Component Bench (Tier 2) -------------------------------
# Drive logical decoding with catalog churn to inflate committed.xip,
# then measure purge function performance.

create_catalog_churn_workload () {
  local ROOT="$1"
  # Use pgbench-style script with many small transactions
  # Each transaction creates/drops tables → commits → inflates committed.xip
  cat >"$ROOT/catalog_churn.sql" <<'SQL'
-- Each iteration = one small transaction with catalog DDL
-- This creates frequent commits that inflate committed.xip in logical decoding
\set id random(1, 1000000000)
BEGIN;
CREATE TABLE IF NOT EXISTS churn_:id (i int);
CREATE INDEX IF NOT EXISTS churn_:id_idx ON churn_:id (i);
DROP INDEX IF EXISTS churn_:id_idx;
DROP TABLE IF EXISTS churn_:id;
COMMIT;
SQL
}

create_concurrent_txn_workload () {
  local ROOT="$1"
  # Workload designed to inflate committed.xip by keeping transactions open
  # High concurrency + sleep = many in-flight XIDs = large arrays to purge
  cat >"$ROOT/concurrent_txn.sql" <<'SQL'
-- Create many concurrent transactions to stress committed.xip purging
\set id random(1, 10000000)
BEGIN;
-- Do some work
INSERT INTO bench_data (id, data) 
VALUES (:id, repeat('x', 100))
ON CONFLICT (id) DO UPDATE SET data = repeat('y', 100);
-- Keep transaction open briefly to create concurrency
-- Multiple clients doing this = hundreds of in-flight XIDs
SELECT pg_sleep(0.05 + random() * 0.15);
COMMIT;
SQL
}

setup_logical_slot () {
  local ROOT="$1" PORT="$2" SLOT_NAME="$3"
  local psql="$(pg "$ROOT" psql)"
  
  # Drop slot if exists
  "$psql" -h 127.0.0.1 -p "$PORT" -d postgres -Atq \
    -c "SELECT pg_drop_replication_slot('$SLOT_NAME') FROM pg_replication_slots WHERE slot_name='$SLOT_NAME';" \
    2>/dev/null || true
  
  # Create logical replication slot
  "$psql" -h 127.0.0.1 -p "$PORT" -d postgres -c \
    "SELECT pg_create_logical_replication_slot('$SLOT_NAME', 'test_decoding');" >/dev/null
}

run_component_bench () {
  local ROOT="$1" PORT="$2" NAME="$3"
  local psql="$(pg "$ROOT" psql)"
  local pgbench="$(pg "$ROOT" pgbench)"
  local pg_recvlogical="$(pg "$ROOT" pg_recvlogical)"
  local OUT_DIR="$ROOT/results/component"; mkdir -p "$OUT_DIR"
  local PROF_DIR="$ROOT/profile"; mkdir -p "$PROF_DIR"
  
  log "[$NAME] Component bench (tier 2) - logical decoding with high concurrency"
  
  # Create table for concurrent transaction workload
  "$psql" -h 127.0.0.1 -p "$PORT" -d postgres -v ON_ERROR_STOP=1 <<'SQL'
DROP TABLE IF EXISTS bench_data;
CREATE TABLE bench_data (id int PRIMARY KEY, data text);
-- Pre-populate to avoid excessive table growth
INSERT INTO bench_data SELECT i, repeat('x', 100) FROM generate_series(1, 100000) i;
SQL
  
  # Enable debug logging temporarily for component bench
  "$psql" -h 127.0.0.1 -p "$PORT" -d postgres -c \
    "ALTER SYSTEM SET log_min_messages = 'log'; SELECT pg_reload_conf();" >/dev/null 2>&1 || true
  
  create_concurrent_txn_workload "$ROOT"
  setup_logical_slot "$ROOT" "$PORT" "bench_slot"
  
  # CRITICAL: High client count to create many in-flight XIDs
  local DURATION="${CATALOG_CHURN_DURATION:-30}"
  local CLIENTS="${CATALOG_CHURN_CLIENTS:-500}"  # Much higher to stress committed.xip
  local PERF_SUDO="${PERF_SUDO:-sudo}"
  
  note "Using HIGH CONCURRENCY: $CLIENTS clients to inflate committed.xip"
  
  # Start logical decoding consumer in background
  note "Starting logical decoding consumer"
  "$pg_recvlogical" -h 127.0.0.1 -p "$PORT" -d postgres \
    -S bench_slot --start -f - >"$OUT_DIR/${NAME}_decoded.log" 2>&1 &
  local DECODER_PID=$!
  sleep 2
  
  # Find the backend PID for the decoder
  local BACKEND_PID=""
  for _ in {1..50}; do
    BACKEND_PID=$("$psql" -X -h 127.0.0.1 -p "$PORT" -d postgres -Atq \
      -c "SELECT pid FROM pg_stat_activity WHERE backend_type='walsender' ORDER BY backend_start DESC LIMIT 1;")
    [[ -n "$BACKEND_PID" && -d "/proc/$BACKEND_PID" ]] && break
    sleep 0.1
  done
  
  if [[ -z "$BACKEND_PID" || ! -d "/proc/$BACKEND_PID" ]]; then
    warn "Couldn't find decoder backend PID"
  else
    note "Decoder backend PID: $BACKEND_PID"
  fi
  
  # Start perf profiling if enabled
  local PERF_PID=""
  if [[ $DO_PROFILE -eq 1 && -n "$BACKEND_PID" ]]; then
    note "Starting perf stat on decoder backend"
    $PERF_SUDO perf stat -e cycles,instructions,branches,branch-misses \
      -e L1-dcache-load-misses,LLC-load-misses,cache-misses \
      -p "$BACKEND_PID" -o "$PROF_DIR/${NAME}_component.perf.stat" \
      sleep "$DURATION" &
    PERF_PID=$!
  fi
  
  # Drive high-concurrency workload to inflate committed.xip
  note "Starting high-concurrency workload (pgbench: $CLIENTS clients, ${DURATION}s)"
  note "This creates hundreds of in-flight XIDs to stress committed.xip purging"
  if [[ -x "$pgbench" ]]; then
    "$pgbench" -h 127.0.0.1 -p "$PORT" -c "$CLIENTS" -j "$CLIENTS" \
      -T "$DURATION" -f "$ROOT/concurrent_txn.sql" postgres \
      >"$OUT_DIR/${NAME}_pgbench.out" 2>&1 || true
  else
    warn "pgbench not found, falling back to parallel psql (less effective)"
    # Fallback: Use multiple parallel psql sessions
    local pids=()
    for ((c=1; c<=CLIENTS; c++)); do
      (
        local end=$((SECONDS + DURATION))
        while (( SECONDS < end )); do
          "$psql" -h 127.0.0.1 -p "$PORT" -d postgres -f "$ROOT/concurrent_txn.sql" >/dev/null 2>&1 || true
        done
      ) &
      pids+=($!)
    done
    
    # Wait for workload to complete
    for pid in "${pids[@]}"; do
      wait "$pid" 2>/dev/null || true
    done
  fi
  
  # Wait for perf if running
  [[ -n "$PERF_PID" ]] && wait "$PERF_PID" 2>/dev/null || true
  
  # Stop decoder
  [[ -n "$DECODER_PID" ]] && kill -TERM "$DECODER_PID" 2>/dev/null || true
  wait "$DECODER_PID" 2>/dev/null || true
  
  # Extract metrics from perf stat
  if [[ -f "$PROF_DIR/${NAME}_component.perf.stat" ]]; then
    note "Perf results: $PROF_DIR/${NAME}_component.perf.stat"
    grep -E 'cycles|instructions|cache-misses' "$PROF_DIR/${NAME}_component.perf.stat" || true
  fi
  
  # Restore log level
  "$psql" -h 127.0.0.1 -p "$PORT" -d postgres -c \
    "ALTER SYSTEM SET log_min_messages = 'warning'; SELECT pg_reload_conf();" >/dev/null 2>&1 || true
  
  # Drop slot
  "$psql" -h 127.0.0.1 -p "$PORT" -d postgres -Atq \
    -c "SELECT pg_drop_replication_slot('bench_slot');" 2>/dev/null || true
  
  log "Component bench complete"
}

# -------------------- End-to-End Bench (Tier 3) ------------------------------
# Measure decoder throughput and latency under sustained catalog churn.

create_dml_workload () {
  local ROOT="$1"
  # DML workload that generates decodable changes (not just DDL)
  cat >"$ROOT/dml_workload.sql" <<'SQL'
-- Generate actual DML changes that logical decoding can consume
-- Mix of INSERTs and UPDATEs to stress the decoder
\set id random(1, 1000000)
BEGIN;
INSERT INTO bench_data (id, data) VALUES (:id, repeat('x', 100)) ON CONFLICT (id) DO UPDATE SET data = repeat('y', 100);
COMMIT;
SQL
}

run_e2e_bench () {
  local ROOT="$1" PORT="$2" NAME="$3"
  local psql="$(pg "$ROOT" psql)"
  local pgbench="$(pg "$ROOT" pgbench)"
  local pg_recvlogical="$(pg "$ROOT" pg_recvlogical)"
  local OUT_DIR="$ROOT/results/e2e"; mkdir -p "$OUT_DIR"
  
  log "[$NAME] End-to-end bench (tier 3) - decoder throughput"
  
  # Create table for DML workload (generates decodable changes)
  "$psql" -h 127.0.0.1 -p "$PORT" -d postgres -v ON_ERROR_STOP=1 <<'SQL'
DROP TABLE IF EXISTS bench_data;
CREATE TABLE bench_data (id int PRIMARY KEY, data text);
SQL
  
  create_dml_workload "$ROOT"
  setup_logical_slot "$ROOT" "$PORT" "e2e_slot"
  
  local DURATION="${CATALOG_CHURN_DURATION:-30}"
  local CLIENTS="${CATALOG_CHURN_CLIENTS:-64}"
  local REPS="${REPS:-10}"
  
  echo "rep,duration_s,changes_decoded,throughput_changes_per_sec" > "$OUT_DIR/${NAME}_e2e.csv"
  
  for rep in $(seq 1 "$REPS"); do
    note "E2E rep $rep/$REPS"
    
    # Start decoder
    local start_time=$(date +%s)
    "$pg_recvlogical" -h 127.0.0.1 -p "$PORT" -d postgres \
      -S e2e_slot --start -f - >"$OUT_DIR/${NAME}_e2e_${rep}.log" 2>&1 &
    local DECODER_PID=$!
    sleep 2  # Give decoder time to start
    
    # Drive DML workload using pgbench (generates actual decodable changes)
    if [[ -x "$pgbench" ]]; then
      "$pgbench" -h 127.0.0.1 -p "$PORT" -c "$CLIENTS" -j "$CLIENTS" \
        -T "$DURATION" -f "$ROOT/dml_workload.sql" postgres \
        >"$OUT_DIR/${NAME}_e2e_pgbench_${rep}.out" 2>&1 || true
    else
      # Fallback to parallel psql
      local pids=()
      for ((c=1; c<=CLIENTS; c++)); do
        (
          local end=$((SECONDS + DURATION))
          while (( SECONDS < end )); do
            "$psql" -h 127.0.0.1 -p "$PORT" -d postgres -f "$ROOT/dml_workload.sql" >/dev/null 2>&1 || true
          done
        ) &
        pids+=($!)
      done
      
      for pid in "${pids[@]}"; do
        wait "$pid" 2>/dev/null || true
      done
    fi
    
    # Wait a moment for decoder to catch up
    sleep 2
    
    local end_time=$(date +%s)
    local elapsed=$((end_time - start_time))
    
    # Stop decoder gracefully
    [[ -n "$DECODER_PID" ]] && kill -TERM "$DECODER_PID" 2>/dev/null || true
    wait "$DECODER_PID" 2>/dev/null || true
    
    # Count decoded transactions (BEGIN lines in test_decoding output)
    local changes=$(grep -c "^BEGIN " "$OUT_DIR/${NAME}_e2e_${rep}.log" 2>/dev/null || echo 0)
    
    # Calculate throughput safely (avoid bc formatting issues)
    local throughput="0.00"
    if [[ $changes -gt 0 && $elapsed -gt 0 ]]; then
      throughput=$(awk "BEGIN {printf \"%.2f\", $changes / $elapsed}")
    fi
    
    echo "$rep,$elapsed,$changes,$throughput" >> "$OUT_DIR/${NAME}_e2e.csv"
    
    # Reset slot for next rep
    "$psql" -h 127.0.0.1 -p "$PORT" -d postgres -Atq \
      -c "SELECT pg_replication_slot_advance('e2e_slot', pg_current_wal_lsn());" >/dev/null 2>&1 || true
  done
  
  # Cleanup
  "$psql" -h 127.0.0.1 -p "$PORT" -d postgres -Atq \
    -c "SELECT pg_drop_replication_slot('e2e_slot');" 2>/dev/null || true
  
  log "E2E results: $OUT_DIR/${NAME}_e2e.csv"
}

# --------------------------------- runner ------------------------------------
run_suite () {
  local ROOT="$1" NAME="$2"
  local PORT=$(pick_ports "${PORT:-5432}")
  log "[$NAME] Using port $PORT"

  init_cluster "$ROOT" "$PORT"
  
  local UNIT_ENABLE="${UNIT_BENCH_ENABLE:-1}"
  local COMPONENT_ENABLE="${COMPONENT_BENCH_ENABLE:-1}"
  local E2E_ENABLE="${E2E_BENCH_ENABLE:-1}"
  
  # Tier 1: Unit microbench (optional, requires extension)
  if [[ $UNIT_ENABLE -eq 1 ]]; then
    [[ "$DO_COLD_CACHE" -eq 1 ]] && drop_os_caches "$ROOT" && restart_cluster "$ROOT"
    run_unit_bench "$ROOT" "$PORT" "$NAME"
  fi
  
  # Tier 2: Component bench (logical decoding with instrumentation)
  if [[ $COMPONENT_ENABLE -eq 1 ]]; then
    [[ "$DO_COLD_CACHE" -eq 1 ]] && drop_os_caches "$ROOT" && restart_cluster "$ROOT"
    run_component_bench "$ROOT" "$PORT" "$NAME"
  fi
  
  # Tier 3: End-to-end bench (decoder throughput)
  if [[ $E2E_ENABLE -eq 1 ]]; then
    [[ "$DO_COLD_CACHE" -eq 1 ]] && drop_os_caches "$ROOT" && restart_cluster "$ROOT"
    run_e2e_bench "$ROOT" "$PORT" "$NAME"
  fi
  
  stop_cluster "$ROOT"
  log "[$NAME] done. Results under: $ROOT/results/"
}

# ------------------------------- build & run ---------------------------------
case "$RUN_MODE" in
  base_only)
    log "Building baseline (vanilla)"
    build_pg "$ROOT_BASE" ""
    ;;
  patched_only)
    log "Building target ($TAG_PATCHED)"
    build_pg "$ROOT_PATCHED" "$PATCH"
    ;;
  both)
    log "Building baseline (vanilla)"
    build_pg "$ROOT_BASE" ""
    log "Building target ($TAG_PATCHED)"
    build_pg "$ROOT_PATCHED" "$PATCH"
    ;;
esac

trap '
  echo "Cleaning up..."
  [[ "$RUN_MODE" != "patched_only" ]] && stop_cluster "'"$ROOT_BASE"'"
  [[ "$RUN_MODE" != "base_only"    ]] && stop_cluster "'"$ROOT_PATCHED"'"
' EXIT

case "$RUN_MODE" in
  base_only)
    run_suite "$ROOT_BASE"   "base" ;;
  patched_only)
    run_suite "$ROOT_PATCHED" "patched" ;;
  both)
    run_suite "$ROOT_BASE"   "base"
    run_suite "$ROOT_PATCHED" "patched" ;;
esac

# -------------------------------- summaries ----------------------------------
compare_results () {
  local base_dir="$1" pat_dir="$2"
  
  echo
  echo "========================================="
  echo "BENCHMARK SUMMARY"
  echo "========================================="
  echo
  
  # Component bench comparison (perf stats)
  if [[ -f "$base_dir/profile/base_component.perf.stat" && \
        -f "$pat_dir/profile/patched_component.perf.stat" ]]; then
    echo "=== Component Bench: Perf Counters ==="
    echo
    echo "BASELINE:"
    grep -E 'cycles|instructions|cache-misses' "$base_dir/profile/base_component.perf.stat" | head -7 || true
    echo
    echo "PATCHED:"
    grep -E 'cycles|instructions|cache-misses' "$pat_dir/profile/patched_component.perf.stat" | head -7 || true
    echo
  fi
  
  # E2E throughput comparison
  if [[ -f "$base_dir/results/e2e/base_e2e.csv" && \
        -f "$pat_dir/results/e2e/patched_e2e.csv" ]]; then
    echo "=== End-to-End: Decoder Throughput (changes/sec) ==="
    echo
    local base_mean=$(awk -F, 'NR>1{sum+=$4; n++} END{if(n) printf "%.2f", sum/n}' "$base_dir/results/e2e/base_e2e.csv")
    local pat_mean=$(awk -F, 'NR>1{sum+=$4; n++} END{if(n) printf "%.2f", sum/n}' "$pat_dir/results/e2e/patched_e2e.csv")
    echo "BASELINE mean:  $base_mean changes/sec"
    echo "PATCHED mean:   $pat_mean changes/sec"
    
    if [[ -n "$base_mean" && -n "$pat_mean" ]]; then
      local improvement=$(echo "scale=2; ($pat_mean - $base_mean) / $base_mean * 100" | bc)
      echo "Improvement:    ${improvement}%"
    fi
    echo
  fi
}

case "$RUN_MODE" in
  both)
    compare_results "$ROOT_BASE" "$ROOT_PATCHED"
    ;;
  base_only)
    echo; echo "=== Results (baseline only) ==="
    find "$ROOT_BASE/results" -name "*.csv" 2>/dev/null || true
    ;;
  patched_only)
    echo; echo "=== Results (patched only) ==="
    find "$ROOT_PATCHED/results" -name "*.csv" 2>/dev/null || true
    ;;
esac

echo
echo "========================================="
echo "NEXT STEPS"
echo "========================================="
echo
echo "1. Examine detailed results:"
case "$RUN_MODE" in
  base_only)
    echo "   - ${ROOT_BASE}/results/"
    echo "   - ${ROOT_BASE}/profile/"
    ;;
  patched_only)
    echo "   - ${ROOT_PATCHED}/results/"
    echo "   - ${ROOT_PATCHED}/profile/"
    ;;
  both)
    echo "   - ${ROOT_BASE}/results/ (baseline)"
    echo "   - ${ROOT_PATCHED}/results/ (patched)"
    echo "   - ${ROOT_BASE}/profile/ and ${ROOT_PATCHED}/profile/"
    ;;
esac
echo
echo "2. Statistical analysis (compute 95% CI, effect sizes):"
echo "   Use R/Python to analyze CSV files"
echo
echo "3. Plot results:"
echo "   - Time vs xcnt for different keep_ratios (unit bench)"
echo "   - Cycles, cache-misses comparison (component bench)"
echo "   - Throughput distribution (e2e bench)"
echo
echo "4. Validate correctness:"
echo "   Ensure decoded output is identical between baseline and patched"
echo
echo "========================================="
echo "EXPERIMENTAL CONTROLS CHECKLIST"
echo "========================================="
echo "☐ CPU governor set to 'performance'"
echo "☐ Process affinity pinned (taskset)"
echo "☐ NUMA locality fixed (numactl)"
echo "☐ Turbo boost disabled (for reduced jitter)"
echo "☐ Background services stopped (cron, updatedb, etc.)"
echo "☐ ≥30 repetitions collected"
echo "☐ Identical builds (same compiler, flags)"
echo "☐ Results include median, p95, and 95% CI"
echo "☐ Effect size computed (not just p-values)"
echo

