#!/usr/bin/env python3
"""
Four-Way Autovacuum Scheduling Comparison (v12 Patch + Proposed Fixes)

Compares four scheduling strategies across N runs with randomized OID
assignments to verify robustness:

  1. Before (OID order)         — pre-v19 catalog scan order
  2. v12 Patch (Score-based)    — unified score, highest first
  3. Dynamic Scaling            — David Rowley's weight-based early boost
  4. Proposed Fix (Tiered sort) — force_vacuum tables always first, then score

Scoring matches the v12 patch (src/backend/postmaster/autovacuum.c):
  - XID/MXID age scores: ONLY when force_vacuum (age > freeze_max_age)
  - Dead tuple / insert / analyze scores: ONLY when above threshold
  - Exponential boost: only past failsafe age (or earlier with dynamic scaling)
  - Final score = max(all applicable components)

The dynamic scaling approach (David Rowley's suggestion):
  - Allows weights up to 10.0
  - Activates exponential boost at effective_failsafe / max(weight, 1.0)
  - At weight=8.0, aggressive scaling starts at 200M instead of 1.6B

The tiered sort approach changes only the sort comparator:
  sort by (wraparound DESC, score DESC)  instead of  (score DESC)
This guarantees force_vacuum tables outrank routine maintenance.

Expected results:
  - OID:     High variance across runs (depends on OID luck)
  - Score:   Low variance but force_vacuum tables outranked by active tables
  - Dynamic: Better prioritization with higher weight
  - Tiered:  Low variance AND minimal force_vacuum exposure
"""

import copy
import math
import os
import random
import statistics
import time
from dataclasses import dataclass
from typing import Dict, List, Optional

try:
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    import numpy as np
    HAS_MATPLOTLIB = True
except ImportError:
    HAS_MATPLOTLIB = False
    print("Warning: matplotlib not available, visualization will be skipped")


# ============================================================================
# Configuration
# ============================================================================

@dataclass
class PgConfig:
    autovacuum_freeze_max_age: int = 200_000_000
    vacuum_failsafe_age: int = 1_600_000_000
    autovacuum_multixact_freeze_max_age: int = 400_000_000
    vacuum_multixact_failsafe_age: int = 1_600_000_000
    autovacuum_vacuum_threshold: int = 50
    autovacuum_vacuum_scale_factor: float = 0.2
    autovacuum_vacuum_insert_threshold: int = 1000
    autovacuum_vacuum_insert_scale_factor: float = 0.2
    autovacuum_analyze_threshold: int = 50
    autovacuum_analyze_scale_factor: float = 0.1
    autovacuum_freeze_score_weight: float = 1.0
    autovacuum_multixact_freeze_score_weight: float = 1.0
    autovacuum_vacuum_score_weight: float = 1.0
    autovacuum_vacuum_insert_score_weight: float = 1.0
    autovacuum_analyze_score_weight: float = 1.0

    @property
    def effective_xid_failsafe(self) -> int:
        return max(self.vacuum_failsafe_age,
                   int(self.autovacuum_freeze_max_age * 1.05))

    @property
    def effective_mxid_failsafe(self) -> int:
        return max(self.vacuum_multixact_failsafe_age,
                   int(self.autovacuum_multixact_freeze_max_age * 1.05))


@dataclass
class SimConfig:
    num_workers: int = 3
    duration_sec: int = 604_800      # 7 days
    time_step_sec: int = 60          # 1-minute granularity
    vacuum_base_sec: int = 120
    vacuum_per_mtuple_sec: int = 30
    num_runs: int = 20
    master_seed: int = 42


# ============================================================================
# Table & Job
# ============================================================================

@dataclass
class Table:
    oid: int
    name: str
    n_tuples: int
    relfrozenxid_age: int
    relminmxid_age: int
    n_dead_tup: int = 0
    n_ins_since_vacuum: int = 0
    n_mod_since_analyze: int = 0
    xid_rate: int = 1000
    dead_rate: int = 0
    insert_rate: int = 0
    is_being_vacuumed: bool = False
    became_force_at: Optional[int] = None
    vacuumed_after_force_at: Optional[int] = None


@dataclass
class Job:
    table: Table
    end_time: int
    do_vacuum: bool
    do_analyze: bool


# ============================================================================
# Scoring — matches v12 patch (autovacuum.c lines 3161-3290)
# ============================================================================

def relation_needs_vacanalyze(t: Table, c: PgConfig):
    """
    Returns (dovacuum, doanalyze, force_vacuum, score).

    XID/MXID scores gated on force_vacuum; other scores gated on threshold.
    """
    dovacuum = doanalyze = False
    score = 0.0

    # force_vacuum when age exceeds freeze_max_age
    force_vacuum = (t.relfrozenxid_age > c.autovacuum_freeze_max_age or
                    t.relminmxid_age > c.autovacuum_multixact_freeze_max_age)

    # XID/MXID scoring: ONLY inside force_vacuum block
    if force_vacuum:
        xid_score = t.relfrozenxid_age / c.autovacuum_freeze_max_age
        mxid_score = t.relminmxid_age / c.autovacuum_multixact_freeze_max_age

        # Exponential boost past failsafe age
        if t.relfrozenxid_age >= c.effective_xid_failsafe:
            xid_score = math.pow(
                xid_score, max(1.0, t.relfrozenxid_age / 100_000_000))
        if t.relminmxid_age >= c.effective_mxid_failsafe:
            mxid_score = math.pow(
                mxid_score, max(1.0, t.relminmxid_age / 100_000_000))

        xid_score *= c.autovacuum_freeze_score_weight
        mxid_score *= c.autovacuum_multixact_freeze_score_weight
        score = max(xid_score, mxid_score)
        dovacuum = True

    # Dead tuple score: ONLY when above threshold
    vacthresh = (c.autovacuum_vacuum_threshold +
                 c.autovacuum_vacuum_scale_factor * t.n_tuples)
    if t.n_dead_tup > vacthresh:
        s = (t.n_dead_tup / max(vacthresh, 1)) * c.autovacuum_vacuum_score_weight
        score = max(score, s)
        dovacuum = True

    # Insert score: ONLY when above threshold
    insthresh = (c.autovacuum_vacuum_insert_threshold +
                 c.autovacuum_vacuum_insert_scale_factor * t.n_tuples)
    if t.n_ins_since_vacuum > insthresh:
        s = (t.n_ins_since_vacuum / max(insthresh, 1)) * c.autovacuum_vacuum_insert_score_weight
        score = max(score, s)
        dovacuum = True

    # Analyze score: ONLY when above threshold
    anlthresh = (c.autovacuum_analyze_threshold +
                 c.autovacuum_analyze_scale_factor * t.n_tuples)
    if t.n_mod_since_analyze > anlthresh:
        s = (t.n_mod_since_analyze / max(anlthresh, 1)) * c.autovacuum_analyze_score_weight
        score = max(score, s)
        doanalyze = True

    return dovacuum, doanalyze, force_vacuum, score


def relation_needs_vacanalyze_dynamic(t: Table, c: PgConfig, weight: float = 8.0):
    """
    Returns (dovacuum, doanalyze, force_vacuum, score).

    Implements David Rowley's dynamic scaling suggestion:
    - Allows freeze_score_weight up to 10.0
    - Activates exponential boost earlier based on weight
    - When weight > 1.0, aggressive scaling starts at failsafe / weight
    """
    dovacuum = doanalyze = False
    score = 0.0

    # force_vacuum when age exceeds freeze_max_age
    force_vacuum = (t.relfrozenxid_age > c.autovacuum_freeze_max_age or
                    t.relminmxid_age > c.autovacuum_multixact_freeze_max_age)

    # XID/MXID scoring: ONLY inside force_vacuum block
    if force_vacuum:
        xid_score = t.relfrozenxid_age / c.autovacuum_freeze_max_age
        mxid_score = t.relminmxid_age / c.autovacuum_multixact_freeze_max_age

        # Dynamic exponential boost based on weight
        # Kick in at effective_failsafe / max(weight, 1.0)
        dynamic_xid_threshold = c.effective_xid_failsafe / max(weight, 1.0)
        dynamic_mxid_threshold = c.effective_mxid_failsafe / max(weight, 1.0)

        if t.relfrozenxid_age >= dynamic_xid_threshold:
            xid_score = math.pow(
                xid_score, max(1.0, t.relfrozenxid_age / 100_000_000))
        if t.relminmxid_age >= dynamic_mxid_threshold:
            mxid_score = math.pow(
                mxid_score, max(1.0, t.relminmxid_age / 100_000_000))

        xid_score *= weight
        mxid_score *= weight
        score = max(xid_score, mxid_score)
        dovacuum = True

    # Dead tuple score: ONLY when above threshold
    vacthresh = (c.autovacuum_vacuum_threshold +
                 c.autovacuum_vacuum_scale_factor * t.n_tuples)
    if t.n_dead_tup > vacthresh:
        s = (t.n_dead_tup / max(vacthresh, 1)) * c.autovacuum_vacuum_score_weight
        score = max(score, s)
        dovacuum = True

    # Insert score: ONLY when above threshold
    insthresh = (c.autovacuum_vacuum_insert_threshold +
                 c.autovacuum_vacuum_insert_scale_factor * t.n_tuples)
    if t.n_ins_since_vacuum > insthresh:
        s = (t.n_ins_since_vacuum / max(insthresh, 1)) * c.autovacuum_vacuum_insert_score_weight
        score = max(score, s)
        dovacuum = True

    # Analyze score: ONLY when above threshold
    anlthresh = (c.autovacuum_analyze_threshold +
                 c.autovacuum_analyze_scale_factor * t.n_tuples)
    if t.n_mod_since_analyze > anlthresh:
        s = (t.n_mod_since_analyze / max(anlthresh, 1)) * c.autovacuum_analyze_score_weight
        score = max(score, s)
        doanalyze = True

    return dovacuum, doanalyze, force_vacuum, score


# ============================================================================
# Simulation Engine
# ============================================================================

def run_simulation(
    tables: List[Table], cfg: PgConfig, scfg: SimConfig,
    mode: str, rng: random.Random
) -> Dict:
    """
    mode: 'oid'     — Before: sort candidates by OID
          'score'   — v12 Patch: sort candidates by score DESC
          'dynamic' — David Rowley: weight=8.0 with early exponential boost
          'tiered'  — Proposed: sort by (force_vacuum DESC, score DESC)
    """
    jobs: List[Job] = []
    force_counts: List[int] = []
    dt = scfg.time_step_sec
    total_steps = scfg.duration_sec // dt

    for step in range(total_steps):
        now = step * dt

        # --- Complete finished jobs ---
        finished = [j for j in jobs if j.end_time <= now]
        for j in finished:
            t = j.table
            if j.do_vacuum:
                t.n_dead_tup = 0
                t.n_ins_since_vacuum = 0
                t.relfrozenxid_age = rng.randint(100, 5000)
                t.relminmxid_age = rng.randint(50, 2500)
            if j.do_analyze:
                t.n_mod_since_analyze = 0
            t.is_being_vacuumed = False
            if (j.do_vacuum
                    and t.became_force_at is not None
                    and t.vacuumed_after_force_at is None):
                t.vacuumed_after_force_at = now
        jobs = [j for j in jobs if j.end_time > now]

        # --- Advance workloads ---
        for t in tables:
            if t.is_being_vacuumed:
                continue
            t.relfrozenxid_age += t.xid_rate * dt
            t.relminmxid_age += (t.xid_rate // 2) * dt
            t.n_dead_tup += int(t.dead_rate * dt / 60)
            t.n_ins_since_vacuum += int(t.insert_rate * dt / 60)
            t.n_mod_since_analyze += int(
                (t.dead_rate + t.insert_rate) * dt / 60)
            if t.became_force_at is None:
                if (t.relfrozenxid_age > cfg.autovacuum_freeze_max_age
                        or t.relminmxid_age
                        > cfg.autovacuum_multixact_freeze_max_age):
                    t.became_force_at = now

        # --- Record unvacuumed force_vacuum count ---
        fc = sum(1 for t in tables
                 if not t.is_being_vacuumed
                 and (t.relfrozenxid_age > cfg.autovacuum_freeze_max_age
                      or t.relminmxid_age
                      > cfg.autovacuum_multixact_freeze_max_age))
        force_counts.append(fc)

        # --- Schedule available workers ---
        avail = scfg.num_workers - len(jobs)
        if avail > 0:
            candidates = []
            for t in tables:
                if t.is_being_vacuumed:
                    continue
                # Use dynamic scoring for 'dynamic' mode, regular for others
                if mode == 'dynamic':
                    dv, da, fv, sc = relation_needs_vacanalyze_dynamic(t, cfg, weight=8.0)
                else:
                    dv, da, fv, sc = relation_needs_vacanalyze(t, cfg)
                if dv or da:
                    candidates.append((t, sc, dv, da, fv))

            # KEY DIFFERENCE between the four modes:
            if mode == 'oid':
                candidates.sort(key=lambda x: x[0].oid)
            elif mode == 'score':
                candidates.sort(key=lambda x: x[1], reverse=True)
            elif mode == 'dynamic':
                # Same as score but with dynamic scaling
                candidates.sort(key=lambda x: x[1], reverse=True)
            elif mode == 'tiered':
                # Wraparound first (True=1 > False=0), then score DESC
                candidates.sort(
                    key=lambda x: (int(x[4]), x[1]), reverse=True)

            for t, sc, dv, da, fv in candidates[:avail]:
                dur = (scfg.vacuum_base_sec +
                       int(t.n_tuples / 1_000_000)
                       * scfg.vacuum_per_mtuple_sec)
                t.is_being_vacuumed = True
                jobs.append(Job(table=t, end_time=now + dur,
                                do_vacuum=dv, do_analyze=da))

    return {'force_counts': force_counts, 'tables': tables}


# ============================================================================
# Workload Generation
# ============================================================================

RISK_NAMES = ([f'critical_{i}' for i in range(5)] +
              [f'aging_{i}' for i in range(15)])


def generate_templates(master_rng: random.Random) -> List[Dict]:
    """
    Fixed table properties (no OIDs). OIDs assigned per-run.

    5 critical:  Already past freeze_max_age (220M-800M).
    15 aging:    Will cross freeze_max_age at staggered times during sim.
    80 active:   High dead-tuple generation, keeps workers busy.
    """
    freeze_max = 200_000_000
    templates: List[Dict] = []

    for i, age in enumerate(
        [220_000_000, 350_000_000, 500_000_000, 650_000_000, 800_000_000]
    ):
        templates.append(dict(
            name=f'critical_{i}',
            n_tuples=master_rng.randint(2_000_000, 5_000_000),
            relfrozenxid_age=age,
            relminmxid_age=age // 3,
            xid_rate=master_rng.randint(800, 2000),
            dead_rate=master_rng.randint(1, 5),
            insert_rate=master_rng.randint(1, 3),
        ))

    for i in range(15):
        target_day = 0.3 + (i / 14) * 5.2
        target_sec = int(target_day * 86400)
        rate = master_rng.randint(800, 2500)
        start_age = max(50_000_000, freeze_max - rate * target_sec)
        templates.append(dict(
            name=f'aging_{i}',
            n_tuples=master_rng.randint(2_000_000, 5_000_000),
            relfrozenxid_age=start_age,
            relminmxid_age=start_age // 3,
            xid_rate=rate,
            dead_rate=master_rng.randint(1, 5),
            insert_rate=master_rng.randint(1, 3),
        ))

    for i in range(80):
        n_tup = master_rng.randint(100_000, 2_000_000)
        vacthresh = 50 + 0.2 * n_tup
        templates.append(dict(
            name=f'active_{i}',
            n_tuples=n_tup,
            relfrozenxid_age=master_rng.randint(10_000_000, 80_000_000),
            relminmxid_age=master_rng.randint(5_000_000, 40_000_000),
            n_dead_tup=master_rng.randint(
                int(vacthresh * 0.3), int(vacthresh * 2.5)),
            xid_rate=master_rng.randint(500, 1500),
            dead_rate=master_rng.randint(3000, 12000),
            insert_rate=master_rng.randint(1000, 5000),
        ))

    return templates


def instantiate_tables(
    templates: List[Dict], oid_rng: random.Random
) -> List[Table]:
    """Create Table objects with freshly shuffled OIDs."""
    oids = list(range(16384, 16384 + len(templates)))
    oid_rng.shuffle(oids)
    tables = []
    stat_keys = ('n_dead_tup', 'n_ins_since_vacuum', 'n_mod_since_analyze')
    for tmpl, oid in zip(templates, oids):
        rest = {k: v for k, v in tmpl.items() if k not in stat_keys}
        tables.append(Table(
            oid=oid,
            n_dead_tup=tmpl.get('n_dead_tup', 0),
            n_ins_since_vacuum=tmpl.get('n_ins_since_vacuum', 0),
            n_mod_since_analyze=tmpl.get('n_mod_since_analyze', 0),
            **rest
        ))
    return tables


def compute_exposures(tables: List[Table], duration: int) -> Dict[str, float]:
    """Return {table_name: exposure_minutes} for at-risk tables."""
    tmap = {t.name: t for t in tables}
    out = {}
    for name in RISK_NAMES:
        t = tmap.get(name)
        if t and t.became_force_at is not None:
            end = (t.vacuumed_after_force_at
                   if t.vacuumed_after_force_at is not None
                   else duration)
            out[name] = (end - t.became_force_at) / 60.0
    return out


def safe_stdev(vals):
    """stdev that handles < 2 values."""
    if len(vals) < 2:
        return 0.0
    return statistics.stdev(vals)


# ============================================================================
# Visualization
# ============================================================================

MODES = ['oid', 'score', 'dynamic', 'tiered']
MODE_LABELS = {
    'oid': 'Before (OID Order)',
    'score': 'v12 Patch (Score)',
    'dynamic': 'Dynamic (weight=8.0)',
    'tiered': 'Proposed (Tiered)',
}
MODE_SHORT = {'oid': 'OID', 'score': 'Score', 'dynamic': 'Dynamic', 'tiered': 'Tiered'}
COLORS = {'oid': '#d62728', 'score': '#1f77b4', 'dynamic': '#ff7f0e', 'tiered': '#2ca02c'}


def create_visualization(data, scfg, cfg):
    if not HAS_MATPLOTLIB:
        return
    os.makedirs('output', exist_ok=True)

    n_runs = scfg.num_runs
    total_steps = scfg.duration_sec // scfg.time_step_sec
    time_days = np.arange(total_steps) * scfg.time_step_sec / 86400.0

    fig = plt.figure(figsize=(20, 15))
    gs = fig.add_gridspec(3, 1, hspace=0.3)

    # ================================================================
    # Panel 1: All Four Modes
    # ================================================================
    ax1 = fig.add_subplot(gs[0])
    for mode in MODES:
        tl = data['force_timelines'][mode]
        min_len = min(len(t) for t in tl) if tl else 0
        if min_len > 0:
            arr = np.array([t[:min_len] for t in tl])
            td = time_days[:min_len]
            mean_line = arr.mean(axis=0)
            std_line = arr.std(axis=0)
            ax1.plot(td, mean_line, color=COLORS[mode], linewidth=2.5,
                     label=MODE_LABELS[mode])
            ax1.fill_between(td,
                             np.maximum(mean_line - std_line, 0),
                             mean_line + std_line,
                             color=COLORS[mode], alpha=0.12)

    ax1.set_xlabel('Time (days)', fontsize=12)
    ax1.set_ylabel('Unvacuumed Tables Past freeze_max_age', fontsize=12)
    ax1.set_title(
        'Panel 1: All Four Scheduling Strategies\n'
        f'(mean ± std, {n_runs} runs with randomized OIDs)',
        fontsize=14, fontweight='bold')
    ax1.legend(fontsize=11, loc='upper right')
    ax1.set_xlim(0, scfg.duration_sec / 86400)
    ax1.set_ylim(bottom=-0.5)
    ax1.grid(True, alpha=0.3)

    # ================================================================
    # Panel 2: v12, Dynamic, Tiered (without baseline)
    # ================================================================
    ax2 = fig.add_subplot(gs[1])
    modes_subset = ['score', 'dynamic', 'tiered']
    for mode in modes_subset:
        tl = data['force_timelines'][mode]
        min_len = min(len(t) for t in tl) if tl else 0
        if min_len > 0:
            arr = np.array([t[:min_len] for t in tl])
            td = time_days[:min_len]
            mean_line = arr.mean(axis=0)
            std_line = arr.std(axis=0)
            ax2.plot(td, mean_line, color=COLORS[mode], linewidth=2.5,
                     label=MODE_LABELS[mode])
            ax2.fill_between(td,
                             np.maximum(mean_line - std_line, 0),
                             mean_line + std_line,
                             color=COLORS[mode], alpha=0.12)

    ax2.set_xlabel('Time (days)', fontsize=12)
    ax2.set_ylabel('Unvacuumed Tables Past freeze_max_age', fontsize=12)
    ax2.set_title(
        'Panel 2: Comparing Proposed Fixes to v12 Baseline\n'
        f'(v12 Score vs Dynamic Scaling vs Tiered Sort)',
        fontsize=14, fontweight='bold')
    ax2.legend(fontsize=11, loc='upper right')
    ax2.set_xlim(0, scfg.duration_sec / 86400)
    ax2.set_ylim(bottom=-0.5)
    ax2.grid(True, alpha=0.3)

    # ================================================================
    # Panel 3: Dynamic vs Tiered only
    # ================================================================
    ax3 = fig.add_subplot(gs[2])
    modes_final = ['dynamic', 'tiered']
    for mode in modes_final:
        tl = data['force_timelines'][mode]
        min_len = min(len(t) for t in tl) if tl else 0
        if min_len > 0:
            arr = np.array([t[:min_len] for t in tl])
            td = time_days[:min_len]
            mean_line = arr.mean(axis=0)
            std_line = arr.std(axis=0)
            ax3.plot(td, mean_line, color=COLORS[mode], linewidth=3.0,
                     label=MODE_LABELS[mode])
            ax3.fill_between(td,
                             np.maximum(mean_line - std_line, 0),
                             mean_line + std_line,
                             color=COLORS[mode], alpha=0.15)

    ax3.set_xlabel('Time (days)', fontsize=12)
    ax3.set_ylabel('Unvacuumed Tables Past freeze_max_age', fontsize=12)
    ax3.set_title(
        'Panel 3: Direct Comparison of Proposed Fixes\n'
        f'(Dynamic Scaling weight=8.0 vs Tiered Sort)',
        fontsize=14, fontweight='bold')
    ax3.legend(fontsize=11, loc='upper right')
    ax3.set_xlim(0, scfg.duration_sec / 86400)
    ax3.set_ylim(bottom=-0.5)
    ax3.grid(True, alpha=0.3)

    fig.suptitle(
        'PostgreSQL 19: Autovacuum Scheduling — Wraparound Risk Over Time\n'
        'Comparing v12 Patch, Dynamic Scaling (weight=8.0), and Tiered Sort',
        fontsize=16, fontweight='bold', y=0.995)

    plt.savefig('output/wraparound_risk_comparison.png',
                dpi=150, bbox_inches='tight')
    plt.close()
    print("  ✓ output/wraparound_risk_comparison.png")


# ============================================================================
# Main
# ============================================================================

def main():
    wall_start = time.time()

    print("=" * 80)
    print("FOUR-WAY AUTOVACUUM SCHEDULING COMPARISON")
    print("OID vs v12 Score vs Dynamic Scaling (weight=8.0) vs Tiered Sort")
    print("=" * 80)

    cfg = PgConfig()
    scfg = SimConfig()

    print(f"\nConfig: {scfg.num_workers} workers, "
          f"{scfg.duration_sec // 86400}-day sim, "
          f"{scfg.time_step_sec}s steps, "
          f"{scfg.num_runs} runs")
    print("Tables: 5 critical + 15 aging + 80 active = 100")
    print(f"freeze_max_age = {cfg.autovacuum_freeze_max_age:,}")
    print(f"Estimated runtime: 3-8 minutes\n")

    master_rng = random.Random(scfg.master_seed)
    templates = generate_templates(master_rng)

    # Accumulators
    data: Dict = {
        'force_timelines': {m: [] for m in MODES},
        'avg_exposures': {m: [] for m in MODES},
        'per_table': {m: {n: [] for n in RISK_NAMES} for m in MODES},
        'peaks': {m: [] for m in MODES},
    }

    hdr = f"{'Run':>4}"
    for m in MODES:
        hdr += f"  {MODE_SHORT[m] + ' avg':>11}"
    print(hdr)
    print("-" * (4 + 13 * len(MODES)))

    for run_idx in range(scfg.num_runs):
        run_seed = 1000 + run_idx
        base_tables = instantiate_tables(templates, random.Random(run_seed))

        line = f"  {run_idx + 1:>2}"
        for mi, mode in enumerate(MODES):
            tables = copy.deepcopy(base_tables)
            rng = random.Random(run_seed + 5000 + mi * 1000)

            res = run_simulation(tables, cfg, scfg, mode, rng)

            fc = res['force_counts']
            data['force_timelines'][mode].append(fc)
            data['peaks'][mode].append(max(fc) if fc else 0)

            exp = compute_exposures(res['tables'], scfg.duration_sec)
            vals = list(exp.values())
            avg = statistics.mean(vals) if vals else 0.0
            data['avg_exposures'][mode].append(avg)

            for name in RISK_NAMES:
                if name in exp:
                    data['per_table'][mode][name].append(exp[name])

            line += f"  {avg:>9.0f}m"

        print(line)

    # ================================================================
    # Aggregate Results
    # ================================================================
    print("\n" + "=" * 72)
    print("AGGREGATE RESULTS")
    print("=" * 72)

    def fmts(vals):
        if not vals:
            return "n/a"
        m = statistics.mean(vals)
        s = safe_stdev(vals)
        return (f"{m:>7.0f} ± {s:<7.0f}  "
                f"(min={min(vals):.0f}, max={max(vals):.0f})")

    print(f"\nAvg exposure per run (minutes):")
    for mode in MODES:
        print(f"  {MODE_SHORT[mode]:<10}: "
              f"{fmts(data['avg_exposures'][mode])}")

    print(f"\nPeak concurrent force_vacuum tables:")
    for mode in MODES:
        print(f"  {MODE_SHORT[mode]:<10}: "
              f"{fmts([float(x) for x in data['peaks'][mode]])}")

    print(f"\nPairwise wins (lower avg exposure = better):")
    n = scfg.num_runs
    pairs = [('oid', 'score'), ('oid', 'dynamic'), ('oid', 'tiered'),
             ('score', 'dynamic'), ('score', 'tiered'), ('dynamic', 'tiered')]
    for m1, m2 in pairs:
        w = sum(1 for a, b in zip(
            data['avg_exposures'][m1], data['avg_exposures'][m2]) if b < a)
        l = sum(1 for a, b in zip(
            data['avg_exposures'][m1], data['avg_exposures'][m2]) if b > a)
        t = n - w - l
        print(f"  {MODE_SHORT[m2]} beats {MODE_SHORT[m1]}: "
              f"{w}/{n}   loses: {l}/{n}   ties: {t}/{n}")

    print(f"\nVariance (std dev of avg exposure across runs):")
    for mode in MODES:
        s = safe_stdev(data['avg_exposures'][mode])
        print(f"  {MODE_SHORT[mode]:<10}: {s:.0f} min")

    print(f"\nPer-table mean exposure (minutes):")
    hdr2 = f"  {'Table':<14}"
    for m in MODES:
        hdr2 += f"  {MODE_SHORT[m]:>14}"
    print(hdr2)
    print("  " + "-" * (14 + 16 * len(MODES)))

    for name in RISK_NAMES:
        line = f"  {name:<14}"
        for mode in MODES:
            vals = data['per_table'][mode].get(name, [])
            if vals:
                m = statistics.mean(vals)
                s = safe_stdev(vals)
                line += f"  {m:>6.0f}±{s:<5.0f}"
            else:
                line += f"  {'n/a':>14}"
        print(line)

    elapsed = time.time() - wall_start
    print(f"\n{'=' * 72}")
    print(f"Completed in {elapsed:.0f} seconds ({elapsed / 60:.1f} minutes)")
    print(f"{'=' * 72}")

    # ================================================================
    # Visualization
    # ================================================================
    if HAS_MATPLOTLIB:
        print("\nGenerating visualization...")
        create_visualization(data, scfg, cfg)
    else:
        print("\nSkipping visualization (matplotlib not installed).")

    print("\nDone.")


if __name__ == '__main__':
    main()
