diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 42d0667..ade1b53 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -1138,6 +1138,9 @@ number of transactions actually processed: 10000/10000
 tps = 618.764555 (including connections establishing)
 tps = 622.977698 (excluding connections establishing)
 SQL script 1: &lt;builtin: TPC-B (sort of)&gt;
+ - 10000 transactions (100.0% of total, tps = 618.764555)
+ - latency average = 15.844 ms
+ - latency stddev = 2.715 ms
  - statement latencies in milliseconds:
         0.004386        \set nbranches 1 * :scale
         0.001343        \set ntellers 10 * :scale
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 305c319..5594d1c 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -164,6 +164,7 @@ bool		use_log;			/* log transaction latencies to a file */
 bool		use_quiet;			/* quiet logging onto stderr */
 int			agg_interval;		/* log aggregates instead of individual
 								 * transactions */
+bool        per_script_stats = false; /* whether to collect stats per script */
 int			progress = 0;		/* thread progress report every this seconds */
 bool		progress_timestamp = false; /* progress report with Unix time */
 int			nclients = 1;		/* number of clients */
@@ -299,6 +300,7 @@ static struct
 {
 	const char *name;
 	Command   **commands;
+	StatsData stats;
 }	sql_script[MAX_SCRIPTS];	/* SQL script files */
 static int	num_scripts;		/* number of scripts in sql_script[] */
 static int	num_commands = 0;	/* total number of Command structs */
@@ -1308,7 +1310,7 @@ top:
 		/* transaction finished: calculate latency and log the transaction */
 		if (commands[st->state + 1] == NULL)
 		{
-			if (progress || throttle_delay || latency_limit || logfile)
+			if (progress || throttle_delay || latency_limit || per_script_stats || logfile)
 				doTxStats(thread, st, &now, false, logfile, agg);
 			else
 				thread->stats.cnt++;
@@ -1401,7 +1403,7 @@ top:
 	}
 
 	/* Record transaction start time under logging, progress or throttling */
-	if ((logfile || progress || throttle_delay || latency_limit) && st->state == 0)
+	if ((logfile || progress || throttle_delay || latency_limit || per_script_stats) && st->state == 0)
 	{
 		INSTR_TIME_SET_CURRENT(st->txn_begin);
 
@@ -1889,6 +1891,9 @@ doTxStats(TState *thread, CState *st, instr_time *now,
 
 	if (use_log)
 		doLog(thread, st, logfile, now, agg, skipped, latency, lag);
+
+	if (per_script_stats) /* mutex? hmmm... these are only statistics */
+		doStats(& sql_script[st->use_file].stats, skipped, latency, lag);
 }
 
 
@@ -2678,6 +2683,7 @@ addScript(const char *name, Command **commands)
 
 	sql_script[num_scripts].name = name;
 	sql_script[num_scripts].commands = commands;
+	initStats(& sql_script[num_scripts].stats, 0.0);
 	num_scripts++;
 }
 
@@ -2761,22 +2767,40 @@ printResults(TState *threads, StatsData *total, instr_time total_time,
 	printf("tps = %f (including connections establishing)\n", tps_include);
 	printf("tps = %f (excluding connections establishing)\n", tps_exclude);
 
-	/* Report per-command latencies */
-	if (is_latencies)
+	/* Report per-script stats */
+	if (per_script_stats)
 	{
 		int			i;
 
 		for (i = 0; i < num_scripts; i++)
 		{
-			Command   **commands;
+			printf("SQL script %d: %s\n"
+				   " - "INT64_FORMAT" transactions (%.1f%% of total, tps = %f)\n",
+				   i+1, sql_script[i].name,
+				   sql_script[i].stats.cnt,
+				   100.0 * sql_script[i].stats.cnt / total->cnt,
+				   sql_script[i].stats.cnt / time_include);
 
-			printf("SQL script %d: %s\n", i + 1, sql_script[i].name);
-			printf(" - statement latencies in milliseconds:\n");
+			if (latency_limit)
+				printf(" - number of transactions skipped: "INT64_FORMAT" (%.3f%%)\n",
+					   sql_script[i].stats.skipped,
+					   100.0 * sql_script[i].stats.skipped /
+					   (sql_script[i].stats.skipped + sql_script[i].stats.cnt));
 
-			for (commands = sql_script[i].commands; *commands != NULL; commands++)
-				printf("   %11.3f  %s\n",
-				   1000.0 * (*commands)->stats.sum / (*commands)->stats.count,
-					   (*commands)->line);
+			printSimpleStats(" - latency", & sql_script[i].stats.latency);
+
+			/* Report per-command latencies */
+			if (is_latencies)
+			{
+				Command ** com;
+
+				printf(" - per command latencies in ms:\n");
+
+				for (com = sql_script[i].commands; *com != NULL; com++)
+					printf("   %11.3f  %s\n",
+						   1000.0 * (*com)->stats.sum / (*com)->stats.count,
+						   (*com)->line);
+			}
 		}
 	}
 }
@@ -2962,6 +2986,7 @@ main(int argc, char **argv)
 				break;
 			case 'r':
 				benchmarking_option_set = true;
+				per_script_stats = true;
 				is_latencies = true;
 				break;
 			case 's':
@@ -3185,6 +3210,10 @@ main(int argc, char **argv)
 		internal_script_used = true;
 	}
 
+	/* show per script stats if several scripts are used */
+	if (num_scripts > 1)
+		per_script_stats = true;
+
 	/*
 	 * Don't need more threads than there are clients.  (This is not merely an
 	 * optimization; throttle_delay is calculated incorrectly below if some