#!/usr/bin/env bash

set -euo pipefail

usage()
{
	cat <<'USAGE'
Usage:
  pgstat_xact_baseline_bench.sh [options]

Options:
  --cases "0 10 100 1000"  Pending-entry counts to test.
  --m 2000                 Number of tiny BEGIN/COMMIT transactions.
  --keep                   Keep the temporary cluster directory.
  --pg-bin DIR             Directory containing postgres/initdb/pg_ctl/psql/pg_config.
  --port PORT              Port for the temporary server.

Environment:
  PG_BIN                   Same as --pg-bin.
  CC                       C compiler for the tiny libpq PQexec helper.

The benchmark starts a temporary PostgreSQL cluster, creates the maximum
number of test tables requested, then sends each measured workload as one
simple-query message through libpq.  That is intentional: psql -f would split
the workload into multiple protocol messages and would not pressure the
pgStatPending-across-transaction-boundaries path in the same way.
USAGE
}

cases="0 10 100 1000"
m=2000
keep=0
pg_bin="${PG_BIN:-}"
port=65432

while [ "$#" -gt 0 ]; do
	case "$1" in
		--cases)
			cases="$2"
			shift 2
			;;
		--m)
			m="$2"
			shift 2
			;;
		--keep)
			keep=1
			shift
			;;
		--pg-bin)
			pg_bin="$2"
			shift 2
			;;
		--port)
			port="$2"
			shift 2
			;;
		--help|-h)
			usage
			exit 0
			;;
		*)
			echo "unknown option: $1" >&2
			usage >&2
			exit 2
			;;
	esac
done

if [ -z "$pg_bin" ]; then
	echo "set PG_BIN or pass --pg-bin" >&2
	exit 1
fi

for prog in postgres initdb pg_ctl psql pg_config; do
	if [ ! -x "$pg_bin/$prog" ]; then
		echo "missing executable: $pg_bin/$prog" >&2
		exit 1
	fi
done

pg_tool()
{
	local prog="$1"
	local libdir

	shift
	libdir="$(cd "$pg_bin/../lib" 2>/dev/null && pwd || true)"
	if [ -z "$libdir" ]; then
		"$pg_bin/$prog" "$@"
		return
	fi
	case "$(uname -s)" in
		Darwin)
			DYLD_LIBRARY_PATH="$libdir${DYLD_LIBRARY_PATH:+:$DYLD_LIBRARY_PATH}" \
				"$pg_bin/$prog" "$@"
			;;
		*)
			LD_LIBRARY_PATH="$libdir${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" \
				"$pg_bin/$prog" "$@"
			;;
	esac
}

max_n="$(printf '%s\n' $cases | sort -n | tail -1)"

tmpdir="$(mktemp -d "${TMPDIR:-/tmp}/pgstat-xact-bench.XXXXXX")"
datadir="$tmpdir/data"
logfile="$tmpdir/postgres.log"
helper_src="$tmpdir/pqexec_once.c"
helper="$tmpdir/pqexec_once"

cleanup()
{
	set +e
	if [ -d "$datadir" ]; then
		pg_tool pg_ctl -D "$datadir" -m fast -w stop >/dev/null 2>&1
	fi
	if [ "$keep" -eq 0 ]; then
		rm -rf "$tmpdir"
	else
		echo "kept temporary directory: $tmpdir"
	fi
}
trap cleanup EXIT
trap 'trap - EXIT; cleanup; exit 130' INT
trap 'trap - EXIT; cleanup; exit 143' TERM

cat >"$helper_src" <<'C'
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "libpq-fe.h"

static char *
read_file(const char *path, long *len_out)
{
	FILE *f = fopen(path, "rb");
	long len;
	char *buf;

	if (f == NULL)
	{
		fprintf(stderr, "could not open %s: %s\n", path, strerror(errno));
		exit(2);
	}
	if (fseek(f, 0, SEEK_END) != 0)
	{
		perror("fseek");
		exit(2);
	}
	len = ftell(f);
	if (len < 0)
	{
		perror("ftell");
		exit(2);
	}
	rewind(f);
	buf = malloc((size_t) len + 1);
	if (buf == NULL)
	{
		perror("malloc");
		exit(2);
	}
	if (fread(buf, 1, (size_t) len, f) != (size_t) len)
	{
		perror("fread");
		exit(2);
	}
	fclose(f);
	buf[len] = '\0';
	*len_out = len;
	return buf;
}

int
main(int argc, char **argv)
{
	const char *conninfo;
	const char *sql_path;
	PGconn *conn;
	PGresult *res;
	char *sql;
	long sql_len;

	if (argc != 3)
	{
		fprintf(stderr, "usage: %s CONNINFO SQLFILE\n", argv[0]);
		return 2;
	}

	conninfo = argv[1];
	sql_path = argv[2];
	sql = read_file(sql_path, &sql_len);

	conn = PQconnectdb(conninfo);
	if (PQstatus(conn) != CONNECTION_OK)
	{
		fprintf(stderr, "connection failed: %s\n", PQerrorMessage(conn));
		return 2;
	}

	res = PQexec(conn, sql);

	if (PQresultStatus(res) != PGRES_TUPLES_OK)
	{
		fprintf(stderr, "query failed: %s\n", PQerrorMessage(conn));
		PQclear(res);
		PQfinish(conn);
		free(sql);
		return 1;
	}
	if (PQntuples(res) != 1 || PQnfields(res) != 1)
	{
		fprintf(stderr, "query returned %d rows and %d columns, expected 1x1\n",
				PQntuples(res), PQnfields(res));
		PQclear(res);
		PQfinish(conn);
		free(sql);
		return 1;
	}

	printf("%s\n", PQgetvalue(res, 0, 0));

	PQclear(res);
	PQfinish(conn);
	free(sql);
	return 0;
}
C

includedir="$(pg_tool pg_config --includedir)"
libdir="$(pg_tool pg_config --libdir)"
"${CC:-cc}" -O2 -I"$includedir" -L"$libdir" -Wl,-rpath,"$libdir" \
	-o "$helper" "$helper_src" -lpq

pg_tool initdb -D "$datadir" --auth trust --no-sync --no-instructions \
	--lc-messages=C >/dev/null
cat >>"$datadir/postgresql.conf" <<EOF
listen_addresses = ''
port = $port
unix_socket_directories = '$tmpdir'
fsync = off
autovacuum = off
track_functions = 'none'
max_prepared_transactions = 0
EOF
pg_tool pg_ctl -D "$datadir" -l "$logfile" -w start >/dev/null

conninfo="host=$tmpdir port=$port dbname=postgres"

setup_sql="$tmpdir/setup.sql"
{
	echo "DROP SCHEMA IF EXISTS pgstat_xact_bench CASCADE;"
	echo "CREATE SCHEMA pgstat_xact_bench;"
	echo "SET search_path = pgstat_xact_bench;"
	for i in $(seq 1 "$max_n"); do
		printf "CREATE TABLE t_%06d (a int);\n" "$i"
		printf "INSERT INTO t_%06d VALUES (1);\n" "$i"
	done
} >"$setup_sql"
pg_tool psql -X -q -v ON_ERROR_STOP=1 \
	-h "$tmpdir" -p "$port" -d postgres -f "$setup_sql" >/dev/null

printf "pg_bin=%s\n" "$pg_bin"
printf "tmpdir=%s\n" "$tmpdir"
printf "m=%s\n" "$m"
printf "%10s %12s %12s %12s\n" "pending_n" "xacts_m" "seconds" "us_per_xact"

for n in $cases; do
	sql_file="$tmpdir/run_${n}.sql"
	{
		echo "SET search_path = pgstat_xact_bench;"
		echo "SET stats_fetch_consistency = none;"
		echo "CREATE TEMP TABLE bench_timer (started_at timestamptz);"
		echo "BEGIN;"
		if [ "$n" -eq 0 ]; then
			echo "SELECT 0;"
		else
			for i in $(seq 1 "$n"); do
				printf "SELECT count(*) FROM t_%06d;\n" "$i"
			done
		fi
		echo "COMMIT;"
		echo "INSERT INTO bench_timer VALUES (clock_timestamp());"
		awk -v m="$m" 'BEGIN { for (i = 1; i <= m; i++) print "BEGIN; COMMIT;" }'
		echo "SELECT EXTRACT(epoch FROM clock_timestamp() - started_at)::float8"
		echo "FROM bench_timer;"
	} >"$sql_file"

	case "$(uname -s)" in
		Darwin)
			seconds="$(DYLD_LIBRARY_PATH="$libdir${DYLD_LIBRARY_PATH:+:$DYLD_LIBRARY_PATH}" \
				"$helper" "$conninfo" "$sql_file")"
			;;
		*)
			seconds="$(LD_LIBRARY_PATH="$libdir${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" \
				"$helper" "$conninfo" "$sql_file")"
			;;
	esac
	us_per_xact="$(awk -v s="$seconds" -v m="$m" 'BEGIN { printf "%.3f", (s * 1000000.0) / m }')"
	printf "%10s %12s %12s %12s\n" "$n" "$m" "$seconds" "$us_per_xact"
done
