diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 31ea6ca..8e0c10c 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -5172,6 +5172,96 @@ main(int argc, char **argv)
 	return 0;
 }
 
+/* display the progress report */
+static void
+doProgressReport(TState *thread, StatsData *plast, int64 *plast_report,
+				 int64 now, int64 thread_start)
+{
+	StatsData	cur;
+	int64		run = now - *plast_report,
+				ntx;
+	double		tps,
+				total_run,
+				latency,
+				sqlat,
+				lag,
+				stdev;
+	char		tbuf[64];
+	int		i;
+
+	/*
+	 * Add up the statistics of all threads.
+	 *
+	 * XXX: No locking. There is no guarantee that we get an
+	 * atomic snapshot of the transaction count and latencies, so
+	 * these figures can well be off by a small amount. The
+	 * progress report's purpose is to give a quick overview of
+	 * how the test is going, so that shouldn't matter too much.
+	 * (If a read from a 64-bit integer is not atomic, you might
+	 * get a "torn" read and completely bogus latencies though!)
+	 */
+	initStats(&cur, 0);
+	for (i = 0; i < nthreads; i++)
+	{
+		mergeSimpleStats(&cur.latency, &thread[i].stats.latency);
+		mergeSimpleStats(&cur.lag, &thread[i].stats.lag);
+		cur.cnt += thread[i].stats.cnt;
+		cur.skipped += thread[i].stats.skipped;
+	}
+
+	/* we count only actually executed transactions */
+	ntx = (cur.cnt - cur.skipped) - (plast->cnt - plast->skipped);
+	total_run = (now - thread_start) / 1000000.0;
+	tps = 1000000.0 * ntx / run;
+	if (ntx > 0)
+	{
+		latency = 0.001 * (cur.latency.sum - plast->latency.sum) / ntx;
+		sqlat = 1.0 * (cur.latency.sum2 - plast->latency.sum2) / ntx;
+		stdev = 0.001 * sqrt(sqlat - 1000000.0 * latency * latency);
+		lag = 0.001 * (cur.lag.sum - plast->lag.sum) / ntx;
+	}
+	else
+	{
+		latency = sqlat = stdev = lag = 0;
+	}
+
+	if (progress_timestamp)
+	{
+		/*
+		 * On some platforms the current system timestamp is
+		 * available in now_time, but rather than get entangled
+		 * with that, we just eat the cost of an extra syscall in
+		 * all cases.
+		 */
+		struct timeval tv;
+
+		gettimeofday(&tv, NULL);
+		snprintf(tbuf, sizeof(tbuf), "%ld.%03ld s",
+				 (long) tv.tv_sec, (long) (tv.tv_usec / 1000));
+	}
+	else
+	{
+		/* round seconds are expected, but the thread may be late */
+		snprintf(tbuf, sizeof(tbuf), "%.1f s", total_run);
+	}
+
+	fprintf(stderr,
+			"progress: %s, %.1f tps, lat %.3f ms stddev %.3f",
+			tbuf, tps, latency, stdev);
+
+	if (throttle_delay)
+	{
+		fprintf(stderr, ", lag %.3f ms", lag);
+		if (latency_limit)
+			fprintf(stderr, ", " INT64_FORMAT " skipped",
+					cur.skipped - plast->skipped);
+	}
+	fprintf(stderr, "\n");
+
+	*plast = cur;
+	*plast_report = now;
+}
+
 static void *
 threadRun(void *arg)
 {
@@ -5432,89 +5522,8 @@ threadRun(void *arg)
 			now = INSTR_TIME_GET_MICROSEC(now_time);
 			if (now >= next_report)
 			{
-				/* generate and show report */
-				StatsData	cur;
-				int64		run = now - last_report,
-							ntx;
-				double		tps,
-							total_run,
-							latency,
-							sqlat,
-							lag,
-							stdev;
-				char		tbuf[64];
-
-				/*
-				 * Add up the statistics of all threads.
-				 *
-				 * XXX: No locking. There is no guarantee that we get an
-				 * atomic snapshot of the transaction count and latencies, so
-				 * these figures can well be off by a small amount. The
-				 * progress report's purpose is to give a quick overview of
-				 * how the test is going, so that shouldn't matter too much.
-				 * (If a read from a 64-bit integer is not atomic, you might
-				 * get a "torn" read and completely bogus latencies though!)
-				 */
-				initStats(&cur, 0);
-				for (i = 0; i < nthreads; i++)
-				{
-					mergeSimpleStats(&cur.latency, &thread[i].stats.latency);
-					mergeSimpleStats(&cur.lag, &thread[i].stats.lag);
-					cur.cnt += thread[i].stats.cnt;
-					cur.skipped += thread[i].stats.skipped;
-				}
-
-				/* we count only actually executed transactions */
-				ntx = (cur.cnt - cur.skipped) - (last.cnt - last.skipped);
-				total_run = (now - thread_start) / 1000000.0;
-				tps = 1000000.0 * ntx / run;
-				if (ntx > 0)
-				{
-					latency = 0.001 * (cur.latency.sum - last.latency.sum) / ntx;
-					sqlat = 1.0 * (cur.latency.sum2 - last.latency.sum2) / ntx;
-					stdev = 0.001 * sqrt(sqlat - 1000000.0 * latency * latency);
-					lag = 0.001 * (cur.lag.sum - last.lag.sum) / ntx;
-				}
-				else
-				{
-					latency = sqlat = stdev = lag = 0;
-				}
-
-				if (progress_timestamp)
-				{
-					/*
-					 * On some platforms the current system timestamp is
-					 * available in now_time, but rather than get entangled
-					 * with that, we just eat the cost of an extra syscall in
-					 * all cases.
-					 */
-					struct timeval tv;
-
-					gettimeofday(&tv, NULL);
-					snprintf(tbuf, sizeof(tbuf), "%ld.%03ld s",
-							 (long) tv.tv_sec, (long) (tv.tv_usec / 1000));
-				}
-				else
-				{
-					/* round seconds are expected, but the thread may be late */
-					snprintf(tbuf, sizeof(tbuf), "%.1f s", total_run);
-				}
-
-				fprintf(stderr,
-						"progress: %s, %.1f tps, lat %.3f ms stddev %.3f",
-						tbuf, tps, latency, stdev);
-
-				if (throttle_delay)
-				{
-					fprintf(stderr, ", lag %.3f ms", lag);
-					if (latency_limit)
-						fprintf(stderr, ", " INT64_FORMAT " skipped",
-								cur.skipped - last.skipped);
-				}
-				fprintf(stderr, "\n");
-
-				last = cur;
-				last_report = now;
+				doProgressReport(thread, &last,
+								 &last_report, now, thread_start);
 
 				/*
 				 * Ensure that the next report is in the future, in case
@@ -5529,6 +5538,35 @@ threadRun(void *arg)
 	}
 
 done:
+	/*
+	 * Force a (last) progress report if last one was over 0.5 second ago
+	 * or no progress was shown since the beginning of the run.
+	 */
+	if (progress && thread->tid == 0)
+	{
+		instr_time	now_time;
+		int64		now;
+
+		INSTR_TIME_SET_CURRENT(now_time);
+		now = INSTR_TIME_GET_MICROSEC(now_time);
+
+		/*
+		 * When tap testing, ensure that the run really lasts as required,
+		 * otherwise there is a shortcut under throttling when there is nothing
+		 * to do.
+		 */
+		if (getenv("PGBENCH_TAP_TEST") != NULL &&
+			duration && now - thread_start < duration * 1000000)
+		{
+			pg_usleep(duration * 1000000 + thread_start - now);
+			INSTR_TIME_SET_CURRENT(now_time);
+			now = INSTR_TIME_GET_MICROSEC(now_time);
+		}
+
+		if (last_report == thread_start || now - last_report >= 500000)
+			doProgressReport(thread, &last, &last_report, now, thread_start);
+	}
+
 	INSTR_TIME_SET_CURRENT(start);
 	disconnect_all(state, nstate);
 	INSTR_TIME_SET_CURRENT(end);
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index a8b2962..1953b69 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -4,13 +4,14 @@ use warnings;
 use PostgresNode;
 use TestLib;
 use Test::More;
+use Time::HiRes qw{time};
 
 # start a pgbench specific server
 my $node = get_new_node('main');
 $node->init;
 $node->start;
 
-# invoke pgbench
+# invoke pgbench, return elapsed time
 sub pgbench
 {
 	my ($opts, $stat, $out, $err, $name, $files) = @_;
@@ -32,10 +33,13 @@ sub pgbench
 			append_to_file($filename, $$files{$fn});
 		}
 	}
+
+	my $t0 = time();
 	$node->command_checks_all(\@cmd, $stat, $out, $err, $name);
 
 	# cleanup?
 	#unlink @filenames or die "cannot unlink files (@filenames): $!";
+	return time() - $t0;
 }
 
 # Test concurrent insertion into table with UNIQUE oid column.  DDL expects
@@ -611,14 +615,63 @@ sub check_pgbench_logs
 	ok(unlink(@logs), "remove log files");
 }
 
-# with sampling rate
+# note: --progress-timestamp is not tested
+
+# tell pgbench that it is a tap test, to trigger
+# minor behavioral change(s) which simplify some checks
+# on time-related options
+$ENV{PGBENCH_TAP_TEST} = '1';
+
+# The point of this test is coverage of various time-related features
+# (-T, -P, --aggregate-interval, --rate, --latency-limit...), so it is
+# somehow time sensitive.
+# The checks performed are quite loose so as to still pass even under
+# degraded (high load, slow host, valgrind run) testing conditions.
+# The main point is to provide coverage, most of the time.
+my $elapsed = pgbench(
+	'-T 2 -P 1 -l --log-prefix=001_pgbench_log_1 --aggregate-interval=1'
+	  . ' -S -b se@2 --rate=20 --latency-limit=1000 -j ' . $nthreads
+	  . ' -c 3 -r',
+	0,
+	[   qr{type: multiple},
+		qr{clients: 3},
+		qr{threads: $nthreads},
+		# the shown duration is really -T argument value
+		qr{duration: 2 s},
+		qr{script 1: .* select only},
+		qr{script 2: .* select only},
+		qr{statement latencies in milliseconds},
+		qr{FROM pgbench_accounts} ],
+	[	qr{vacuum},
+		qr{progress: \d\b} ],
+	'pgbench progress');
+
+# this depends on PGBENCH_TAP_TEST to avoid a shortcut under --rate
+# it there is nothing to do.
+ok($elapsed >= 2.0, "-T 2 leads to at least 2 second run");
+
+# undo tap-test behavioral change
+delete $ENV{PGBENCH_TAP_TEST};
+
+# if the test has gone AWOL, coldly skip these detailed checks.
+if (abs($elapsed - 2.0) < 0.5)
+{
+	# $nthreads threads, 2 seconds, but due to timing imprecision we might get
+	# only 1 or as many as 3 progress reports per thread.
+	check_pgbench_logs('001_pgbench_log_1', $nthreads, 1, 3,
+		qr{^\d+ \d{1,2} \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+$});
+}
+
+# with sampling rate, not time sensitive
 pgbench(
 '-n -S -t 50 -c 2 --log --log-prefix=001_pgbench_log_2 --sampling-rate=0.5',
 	0,
 	[ qr{select only}, qr{processed: 100/100} ],
-	[qr{^$}],
+	[ qr{^$} ],
 	'pgbench logs');
 
+# random 50% of 2*50 transactions, accept between 8 and 92
+# probability of failure is about 2 * 2^-42 (?)
 check_pgbench_logs('001_pgbench_log_2', 1, 8, 92,
 	qr{^0 \d{1,2} \d+ \d \d+ \d+$});
 
