#!/bin/bash
#
# Benchmark: pg_dump tar format direct-write optimization
#
# Compares pg_dump -Ft (tar format) to -Fp (plain format) writing to a file,
# with and without the patch to pg_backup_tar.c.
#
# Usage:
#   1. Build and install PostgreSQL into a local prefix.
#   2. Run this script:  ./bench_tar_direct_write.sh /path/to/pg/install
#
# The script creates a temporary database, populates it with ~255MB of data,
# runs pg_dump in tar and plain formats 3 times each, and prints a summary.

set -e

PGINSTALL="${1:?Usage: $0 /path/to/pg/install}"
export PATH="$PGINSTALL/bin:$PATH"
export LD_LIBRARY_PATH="$PGINSTALL/lib:$PGINSTALL/lib64:${LD_LIBRARY_PATH:-}"

PGDATA=$(mktemp -d /tmp/bench_tar_XXXXXX)
PGPORT=54399
DBNAME=bench_tar_test
DUMPFILE=$(mktemp /tmp/bench_dump_XXXXXX)

cleanup() {
    pg_ctl -D "$PGDATA" stop -m immediate 2>/dev/null || true
    rm -rf "$PGDATA" "$DUMPFILE"
}
trap cleanup EXIT

echo "==> Initializing PostgreSQL instance in $PGDATA"
initdb -D "$PGDATA" --no-sync > /dev/null
cat >> "$PGDATA/postgresql.conf" <<CONF
port = $PGPORT
shared_buffers = 256MB
work_mem = 64MB
maintenance_work_mem = 256MB
logging_collector = off
CONF
pg_ctl -D "$PGDATA" -l "$PGDATA/server.log" -o "-p $PGPORT" start -w -s

echo "==> Creating test database ($DBNAME) with ~255MB of data"
psql -p $PGPORT -d postgres -q -c "CREATE DATABASE $DBNAME;"
psql -p $PGPORT -d $DBNAME -q <<'SQL'
CREATE TABLE large_data (
    id serial PRIMARY KEY,
    val1 int,
    val2 text,
    val3 float,
    payload text
);
INSERT INTO large_data (val1, val2, val3, payload)
  SELECT (random()*1000000)::int,
         md5(random()::text),
         random()*1000,
         repeat(md5(random()::text), 10)
  FROM generate_series(1, 500000);
CREATE INDEX large_data_val1_idx ON large_data (val1);
CREATE INDEX large_data_val2_idx ON large_data (val2);
ANALYZE large_data;
SQL
echo "   $(psql -p $PGPORT -d $DBNAME -tAc \
    "SELECT pg_size_pretty(pg_total_relation_size('large_data'))") total"

run_bench() {
    local label="$1" fmt="$2"
    local best=""
    for i in 1 2 3; do
        local secs
        secs=$( { time pg_dump -p $PGPORT -F"$fmt" -f "$DUMPFILE" $DBNAME; } 2>&1 \
             | awk '/^real/ {
                 split($2, a, "m");
                 split(a[2], b, "s");
                 print a[1]*60 + b[1]
               }' )
        if [ -z "$best" ] || awk "BEGIN{exit(!($secs < $best))}"; then
            best=$secs
        fi
        rm -f "$DUMPFILE"
    done
    printf "  %-25s %7.3fs  (best of 3)\n" "$label" "$best"
}

echo ""
echo "==> Running benchmarks (3 runs each, best-of-3 reported)"
echo ""
run_bench "Tar format  (-Ft)" t
run_bench "Plain format (-Fp)" p
echo ""
echo "Done. Compare these numbers before and after applying the patch."
