From 8480fa105032e15ef76926f9247b2f5af97223d0 Mon Sep 17 00:00:00 2001
From: Julien Rouhaud <julien.rouhaud@free.fr>
Date: Fri, 6 Mar 2020 13:40:56 +0100
Subject: [PATCH 1/2] Add an optional timeout value to isolationtester steps.

Some sanity checks can require a command to wait on a lock and eventually be
cancelled.  The only way to do that was to rely on calls to pg_cancel_backend()
filtering pg_stat_activity view, but this isn't a satisfactory solution as
there's no way to guarantee that only the wanted backend will be canceled.

Instead, add a new optional "timeout val" clause to the step definition.  When
this clause is specified, isolationtester will actively wait on that command,
and issue a cancel when the given timeout is reached.

Author: Julien Rouhaud
Reviewed-by:
Discussion: https://postgr.es/m/20200305035354.GQ2593%40paquier.xyz
---
 src/test/isolation/README            | 11 ++++++--
 src/test/isolation/isolationtester.c | 42 ++++++++++++++++++++++++----
 src/test/isolation/isolationtester.h |  2 ++
 src/test/isolation/specparse.y       | 18 ++++++++++--
 src/test/isolation/specscanner.l     |  8 ++++++
 5 files changed, 70 insertions(+), 11 deletions(-)

diff --git a/src/test/isolation/README b/src/test/isolation/README
index 217953d183..827bea0c42 100644
--- a/src/test/isolation/README
+++ b/src/test/isolation/README
@@ -86,10 +86,12 @@ session "<name>"
 
   Each step has the syntax
 
-  step "<name>" { <SQL> }
+  step "<name>" { <SQL> } [ TIMEOUT seconds ]
 
-  where <name> is a name identifying this step, and SQL is a SQL statement
-  (or statements, separated by semicolons) that is executed in the step.
+  where <name> is a name identifying this step, SQL is a SQL statement
+  (or statements, separated by semicolons) that is executed in the step and
+  seconds an optional timeout for the given SQL statement(s) to wait on before
+  canceling it.
   Step names must be unique across the whole spec file.
 
 permutation "<step name>" ...
@@ -125,6 +127,9 @@ after PGISOLATIONTIMEOUT seconds.  If the cancel doesn't work, isolationtester
 will exit uncleanly after a total of twice PGISOLATIONTIMEOUT.  Testing
 invalid permutations should be avoided because they can make the isolation
 tests take a very long time to run, and they serve no useful testing purpose.
+If a test specified the option timeout specification, then isolationtester will
+actively wait for the step commands completion rather than continuing with the
+permutation next step, and send a cancel once the given timeout is reached.
 
 Note that isolationtester recognizes that a command has blocked by looking
 to see if it is shown as waiting in the pg_locks view; therefore, only
diff --git a/src/test/isolation/isolationtester.c b/src/test/isolation/isolationtester.c
index f80261c022..dd5d335027 100644
--- a/src/test/isolation/isolationtester.c
+++ b/src/test/isolation/isolationtester.c
@@ -120,10 +120,28 @@ main(int argc, char **argv)
 	spec_yyparse();
 	testspec = &parseresult;
 
-	/* Create a lookup table of all steps. */
+	/*
+	 * Create a lookup table of all steps and validate any timeout
+	 * specification.
+	 */
 	nallsteps = 0;
 	for (i = 0; i < testspec->nsessions; i++)
+	{
 		nallsteps += testspec->sessions[i]->nsteps;
+		for (j = 0; j < testspec->sessions[i]->nsteps; j++)
+		{
+			if ((testspec->sessions[i]->steps[j]->timeout  * USECS_PER_SEC) >=
+					(max_step_wait /2))
+			{
+				fprintf(stderr, "step %s: step timeout (%d) should be less"
+						" than global timeout (%ld)",
+						testspec->sessions[i]->steps[j]->name,
+						testspec->sessions[i]->steps[j]->timeout,
+						(max_step_wait / USECS_PER_SEC));
+				exit(1);
+			}
+		}
+	}
 
 	allsteps = pg_malloc(nallsteps * sizeof(Step *));
 
@@ -587,8 +605,14 @@ run_permutation(TestSpec *testspec, int nsteps, Step **steps)
 			exit(1);
 		}
 
-		/* Try to complete this step without blocking.  */
-		mustwait = try_complete_step(testspec, step, STEP_NONBLOCK);
+		/*
+		 * Try to complete this step without blocking, unless the step has a
+		 * timeout.
+		 */
+		mustwait = try_complete_step(testspec, step,
+				(step->timeout == 0 ? STEP_NONBLOCK : 0));
+		if (step->timeout != 0)
+			report_error_message(step);
 
 		/* Check for completion of any steps that were previously waiting. */
 		w = 0;
@@ -721,6 +745,7 @@ try_complete_step(TestSpec *testspec, Step *step, int flags)
 		{
 			struct timeval current_time;
 			int64		td;
+			int64		step_timeout;
 
 			/* If it's OK for the step to block, check whether it has. */
 			if (flags & STEP_NONBLOCK)
@@ -778,6 +803,11 @@ try_complete_step(TestSpec *testspec, Step *step, int flags)
 			td *= USECS_PER_SEC;
 			td += (int64) current_time.tv_usec - (int64) start_time.tv_usec;
 
+			if (step->timeout)
+				step_timeout = step->timeout * USECS_PER_SEC;
+			else
+				step_timeout = max_step_wait;
+
 			/*
 			 * After max_step_wait microseconds, try to cancel the query.
 			 *
@@ -787,7 +817,7 @@ try_complete_step(TestSpec *testspec, Step *step, int flags)
 			 * failing, but remaining permutations and tests should still be
 			 * OK.
 			 */
-			if (td > max_step_wait && !canceled)
+			if (td > step_timeout && !canceled)
 			{
 				PGcancel   *cancel = PQgetCancel(conn);
 
@@ -812,13 +842,13 @@ try_complete_step(TestSpec *testspec, Step *step, int flags)
 			}
 
 			/*
-			 * After twice max_step_wait, just give up and die.
+			 * After twice the step timeout, just give up and die.
 			 *
 			 * Since cleanup steps won't be run in this case, this may cause
 			 * later tests to fail.  That stinks, but it's better than waiting
 			 * forever for the server to respond to the cancel.
 			 */
-			if (td > 2 * max_step_wait)
+			if (td > 2 * step_timeout)
 			{
 				fprintf(stderr, "step %s timed out after %d seconds\n",
 						step->name, (int) (td / USECS_PER_SEC));
diff --git a/src/test/isolation/isolationtester.h b/src/test/isolation/isolationtester.h
index 9cf5012416..0a0dd54ff3 100644
--- a/src/test/isolation/isolationtester.h
+++ b/src/test/isolation/isolationtester.h
@@ -33,6 +33,8 @@ struct Step
 	char	   *name;
 	char	   *sql;
 	char	   *errormsg;
+	int			timeout;
+	struct timeval start_time;
 };
 
 typedef struct
diff --git a/src/test/isolation/specparse.y b/src/test/isolation/specparse.y
index 5e007e1bf0..76842c42ab 100644
--- a/src/test/isolation/specparse.y
+++ b/src/test/isolation/specparse.y
@@ -25,6 +25,7 @@ TestSpec		parseresult;			/* result of parsing is left here */
 %union
 {
 	char	   *str;
+	int			ival;
 	Session	   *session;
 	Step	   *step;
 	Permutation *permutation;
@@ -43,9 +44,11 @@ TestSpec		parseresult;			/* result of parsing is left here */
 %type <session> session
 %type <step> step
 %type <permutation> permutation
+%type <ival> opt_timeout
 
 %token <str> sqlblock string_literal
-%token PERMUTATION SESSION SETUP STEP TEARDOWN TEST
+%token <ival> INTEGER
+%token PERMUTATION SESSION SETUP STEP TEARDOWN TEST TIMEOUT
 
 %%
 
@@ -140,16 +143,27 @@ step_list:
 
 
 step:
-			STEP string_literal sqlblock
+			STEP string_literal sqlblock opt_timeout
 			{
 				$$ = pg_malloc(sizeof(Step));
 				$$->name = $2;
 				$$->sql = $3;
 				$$->used = false;
 				$$->errormsg = NULL;
+				$$->timeout = $4;
 			}
 		;
 
+opt_timeout:
+			TIMEOUT INTEGER
+			{
+				$$ = $2;
+			}
+			| /* EMPTY */
+			{
+				$$ = 0;
+			}
+		;
 
 opt_permutation_list:
 			permutation_list
diff --git a/src/test/isolation/specscanner.l b/src/test/isolation/specscanner.l
index 410f17727e..1ec1812569 100644
--- a/src/test/isolation/specscanner.l
+++ b/src/test/isolation/specscanner.l
@@ -53,6 +53,7 @@ session			{ return SESSION; }
 setup			{ return SETUP; }
 step			{ return STEP; }
 teardown		{ return TEARDOWN; }
+timeout			{ return TIMEOUT; }
 
 [\n]			{ yyline++; }
 {comment}		{ /* ignore */ }
@@ -96,6 +97,13 @@ teardown		{ return TEARDOWN; }
 					yyerror("unterminated sql block");
 				}
 
+ /* integer */
+[0-9]+			{
+					yylval.ival = atoi(yytext);
+					return INTEGER;
+
+				}
+
 .				{
 					fprintf(stderr, "syntax error at line %d: unexpected character \"%s\"\n", yyline, yytext);
 					exit(1);
-- 
2.25.1

