From 77c214c9fb2fa7f9a003b96db0e5ec6506217e38 Mon Sep 17 00:00:00 2001
From: Julien Rouhaud <julien.rouhaud@free.fr>
Date: Fri, 6 Mar 2020 13:40:56 +0100
Subject: [PATCH v2 1/2] Add an optional cancel on clause to isolationtester
 steps.

Some sanity checks can require a command to wait on a lock and eventually be
cancelled.  The only way to do that was to rely on calls to pg_cancel_backend()
filtering pg_stat_activity view, but this isn't a satisfactory solution as
there's no way to guarantee that only the wanted backend will be canceled.

Instead, add a new optional "cancel on <wait_event_type> <wait_event>" clause
to the step definition.  When this clause is specified, isolationtester will
actively wait on that command, and issue a cancel when the query is waiting on
the given wait event.

Author: Julien Rouhaud
Reviewed-by:
Discussion: https://postgr.es/m/20200305035354.GQ2593%40paquier.xyz
---
 src/backend/utils/adt/lockfuncs.c    | 54 ++++++++++++++++++++++++++--
 src/include/catalog/pg_proc.dat      |  5 ++-
 src/test/isolation/README            | 12 +++++--
 src/test/isolation/isolationtester.c | 43 +++++++++++++++++-----
 src/test/isolation/isolationtester.h |  8 +++++
 src/test/isolation/specparse.y       | 20 +++++++++--
 src/test/isolation/specscanner.l     |  2 ++
 7 files changed, 127 insertions(+), 17 deletions(-)

diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c
index ecb1bf92ff..cc2bc94d0d 100644
--- a/src/backend/utils/adt/lockfuncs.c
+++ b/src/backend/utils/adt/lockfuncs.c
@@ -17,7 +17,9 @@
 #include "catalog/pg_type.h"
 #include "funcapi.h"
 #include "miscadmin.h"
+#include "pgstat.h"
 #include "storage/predicate_internals.h"
+#include "storage/procarray.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
 
@@ -578,17 +580,28 @@ pg_safe_snapshot_blocking_pids(PG_FUNCTION_ARGS)
 Datum
 pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS)
 {
+	TupleDesc	tupdesc;
 	int			blocked_pid = PG_GETARG_INT32(0);
 	ArrayType  *interesting_pids_a = PG_GETARG_ARRAYTYPE_P(1);
 	ArrayType  *blocking_pids_a;
 	int32	   *interesting_pids;
 	int32	   *blocking_pids;
+	Datum		values[3];
+	bool		nulls[3];
+	PGPROC	   *proc;
+	uint32		raw_wait_event = 0;
+	const char *wait_event_type = NULL;
+	const char *wait_event = NULL;
 	int			num_interesting_pids;
 	int			num_blocking_pids;
 	int			dummy;
 	int			i,
 				j;
 
+	/* Initialise values and NULL flags arrays */
+	MemSet(values, 0, sizeof(values));
+	MemSet(nulls, 0, sizeof(nulls));
+
 	/* Validate the passed-in array */
 	Assert(ARR_ELEMTYPE(interesting_pids_a) == INT4OID);
 	if (array_contains_nulls(interesting_pids_a))
@@ -597,6 +610,34 @@ pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS)
 	num_interesting_pids = ArrayGetNItems(ARR_NDIM(interesting_pids_a),
 										  ARR_DIMS(interesting_pids_a));
 
+	/* Initialise attributes information in the tuple descriptor */
+	tupdesc = CreateTemplateTupleDesc(3);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "blocked",
+					   BOOLOID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "wait_event_type",
+					   TEXTOID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 3, "wait_even",
+					   TEXTOID, -1, 0);
+
+	BlessTupleDesc(tupdesc);
+
+	proc = BackendPidGetProc(blocked_pid);
+	if (proc)
+	{
+#define UINT32_ACCESS_ONCE(var)		 ((uint32)(*((volatile uint32 *)&(var))))
+		raw_wait_event = UINT32_ACCESS_ONCE(proc->wait_event_info);
+		wait_event_type = pgstat_get_wait_event_type(raw_wait_event);
+		wait_event = pgstat_get_wait_event(raw_wait_event);
+
+		if (wait_event_type != NULL)
+			values[1] = CStringGetTextDatum(wait_event_type);
+		if (wait_event != NULL)
+			values[2] = CStringGetTextDatum(wait_event);
+	}
+
+	nulls[1] = (wait_event_type == NULL);
+	nulls[2] = (wait_event == NULL);
+
 	/*
 	 * Get the PIDs of all sessions blocking the given session's attempt to
 	 * acquire heavyweight locks.
@@ -623,7 +664,11 @@ pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS)
 		for (j = 0; j < num_interesting_pids; j++)
 		{
 			if (blocking_pids[i] == interesting_pids[j])
-				PG_RETURN_BOOL(true);
+			{
+				values[0] = BoolGetDatum(true);
+				PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc,
+								values, nulls)));
+			}
 		}
 
 	/*
@@ -636,9 +681,12 @@ pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS)
 	 * buffer and check if the number of safe snapshot blockers is non-zero.
 	 */
 	if (GetSafeSnapshotBlockingPids(blocked_pid, &dummy, 1) > 0)
-		PG_RETURN_BOOL(true);
+		values[0] = BoolGetDatum(true);
+
+	values[0] = BoolGetDatum(false);
 
-	PG_RETURN_BOOL(false);
+	PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc,
+					values, nulls)));
 }
 
 
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 7fb574f9dc..77529e181d 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5869,7 +5869,10 @@
   prosrc => 'pg_safe_snapshot_blocking_pids' },
 { oid => '3378', descr => 'isolationtester support function',
   proname => 'pg_isolation_test_session_is_blocked', provolatile => 'v',
-  prorettype => 'bool', proargtypes => 'int4 _int4',
+  prorettype => 'record', proargtypes => 'int4 _int4',
+  proallargtypes => '{int4,_int4,bool,text,text}',
+  proargmodes => '{i,i,o,o,o}',
+  proargnames => '{blocking_pid,interesting_pids,blocked,wait_event_type,wait_event}',
   prosrc => 'pg_isolation_test_session_is_blocked' },
 { oid => '1065', descr => 'view two-phase transactions',
   proname => 'pg_prepared_xact', prorows => '1000', proretset => 't',
diff --git a/src/test/isolation/README b/src/test/isolation/README
index 217953d183..04aed5cd17 100644
--- a/src/test/isolation/README
+++ b/src/test/isolation/README
@@ -86,10 +86,12 @@ session "<name>"
 
   Each step has the syntax
 
-  step "<name>" { <SQL> }
+  step "<name>" { <SQL> } [ cancel on "<wait_event_type>" "<wait_event>" ]
 
-  where <name> is a name identifying this step, and SQL is a SQL statement
-  (or statements, separated by semicolons) that is executed in the step.
+  where <name> is a name identifying this step, SQL is a SQL statement
+  (or statements, separated by semicolons) that is executed in the step and
+  <wait_event_type> and <wait_event> a wait event specification for which
+  isolationtester will cancel the query if it's blocked on it.
   Step names must be unique across the whole spec file.
 
 permutation "<step name>" ...
@@ -125,6 +127,10 @@ after PGISOLATIONTIMEOUT seconds.  If the cancel doesn't work, isolationtester
 will exit uncleanly after a total of twice PGISOLATIONTIMEOUT.  Testing
 invalid permutations should be avoided because they can make the isolation
 tests take a very long time to run, and they serve no useful testing purpose.
+If a test specified the optionnal cancel on specification, then isolationtester
+will actively wait for the step commands completion rather than continuing with
+the permutation next step, and send a cancel once the given wait event is
+blocking the query.
 
 Note that isolationtester recognizes that a command has blocked by looking
 to see if it is shown as waiting in the pg_locks view; therefore, only
diff --git a/src/test/isolation/isolationtester.c b/src/test/isolation/isolationtester.c
index f80261c022..f530d9923d 100644
--- a/src/test/isolation/isolationtester.c
+++ b/src/test/isolation/isolationtester.c
@@ -43,6 +43,7 @@ static void run_permutation(TestSpec *testspec, int nsteps, Step **steps);
 
 #define STEP_NONBLOCK	0x1		/* return 0 as soon as cmd waits for a lock */
 #define STEP_RETRY		0x2		/* this is a retry of a previously-waiting cmd */
+#define STEP_WAIT		0x4		/* active wait for a given wait event */
 static bool try_complete_step(TestSpec *testspec, Step *step, int flags);
 
 static int	step_qsort_cmp(const void *a, const void *b);
@@ -212,7 +213,7 @@ main(int argc, char **argv)
 	 */
 	initPQExpBuffer(&wait_query);
 	appendPQExpBufferStr(&wait_query,
-						 "SELECT pg_catalog.pg_isolation_test_session_is_blocked($1, '{");
+						 "SELECT * FROM pg_catalog.pg_isolation_test_session_is_blocked($1, '{");
 	/* The spec syntax requires at least one session; assume that here. */
 	appendPQExpBufferStr(&wait_query, backend_pid_strs[1]);
 	for (i = 2; i < nconns; i++)
@@ -587,8 +588,14 @@ run_permutation(TestSpec *testspec, int nsteps, Step **steps)
 			exit(1);
 		}
 
-		/* Try to complete this step without blocking.  */
-		mustwait = try_complete_step(testspec, step, STEP_NONBLOCK);
+		/*
+		 * Try to complete this step without blocking, unless the step has a
+		 * cancel on clause.
+		 */
+		mustwait = try_complete_step(testspec, step,
+				(step->waitinfo ? STEP_WAIT : STEP_NONBLOCK));
+		if (step->waitinfo)
+			report_error_message(step);
 
 		/* Check for completion of any steps that were previously waiting. */
 		w = 0;
@@ -720,10 +727,12 @@ try_complete_step(TestSpec *testspec, Step *step, int flags)
 		else if (ret == 0)		/* select() timeout: check for lock wait */
 		{
 			struct timeval current_time;
+			char  *wait_event_type = "";
+			char  *wait_event = "";
 			int64		td;
 
 			/* If it's OK for the step to block, check whether it has. */
-			if (flags & STEP_NONBLOCK)
+			if (flags & (STEP_NONBLOCK | STEP_WAIT))
 			{
 				bool		waiting;
 
@@ -738,9 +747,17 @@ try_complete_step(TestSpec *testspec, Step *step, int flags)
 					exit(1);
 				}
 				waiting = ((PQgetvalue(res, 0, 0))[0] == 't');
+				if (waiting && (flags & STEP_WAIT))
+				{
+					wait_event_type = pg_strdup(PQgetvalue(res, 0, 1));
+					Assert(wait_event_type != NULL);
+
+					wait_event = pg_strdup(PQgetvalue(res, 0, 2));
+					Assert(wait_event != NULL);
+				}
 				PQclear(res);
 
-				if (waiting)	/* waiting to acquire a lock */
+				if (waiting && (flags & STEP_NONBLOCK))	/* waiting to acquire a lock */
 				{
 					/*
 					 * Since it takes time to perform the lock-check query,
@@ -787,7 +804,11 @@ try_complete_step(TestSpec *testspec, Step *step, int flags)
 			 * failing, but remaining permutations and tests should still be
 			 * OK.
 			 */
-			if (td > max_step_wait && !canceled)
+			if ((td > max_step_wait ||
+				(step->waitinfo
+				 && strcmp(step->waitinfo->wait_event_type, wait_event_type) == 0
+				 && strcmp(step->waitinfo->wait_event, wait_event) == 0))
+				&& !canceled)
 			{
 				PGcancel   *cancel = PQgetCancel(conn);
 
@@ -801,8 +822,14 @@ try_complete_step(TestSpec *testspec, Step *step, int flags)
 						 * print to stdout not stderr, as this should appear
 						 * in the test case's results
 						 */
-						printf("isolationtester: canceling step %s after %d seconds\n",
-							   step->name, (int) (td / USECS_PER_SEC));
+						if (!step->waitinfo)
+							printf("isolationtester: canceling step %s after %d seconds\n",
+								   step->name, (int) (td / USECS_PER_SEC));
+						else
+							printf("isolationtester: canceling step %s on wait event %s/%s\n",
+									step->name,
+									step->waitinfo->wait_event_type,
+									step->waitinfo->wait_event);
 						canceled = true;
 					}
 					else
diff --git a/src/test/isolation/isolationtester.h b/src/test/isolation/isolationtester.h
index 9cf5012416..5df540485c 100644
--- a/src/test/isolation/isolationtester.h
+++ b/src/test/isolation/isolationtester.h
@@ -26,6 +26,12 @@ struct Session
 	int			nsteps;
 };
 
+typedef struct WaitInfo
+{
+	char	   *wait_event_type;
+	char	   *wait_event;
+} WaitInfo;
+
 struct Step
 {
 	int			session;
@@ -33,6 +39,8 @@ struct Step
 	char	   *name;
 	char	   *sql;
 	char	   *errormsg;
+	WaitInfo   *waitinfo;
+	struct timeval start_time;
 };
 
 typedef struct
diff --git a/src/test/isolation/specparse.y b/src/test/isolation/specparse.y
index 5e007e1bf0..c8e72f4316 100644
--- a/src/test/isolation/specparse.y
+++ b/src/test/isolation/specparse.y
@@ -27,6 +27,7 @@ TestSpec		parseresult;			/* result of parsing is left here */
 	char	   *str;
 	Session	   *session;
 	Step	   *step;
+	WaitInfo   *waitinfo;
 	Permutation *permutation;
 	struct
 	{
@@ -43,9 +44,11 @@ TestSpec		parseresult;			/* result of parsing is left here */
 %type <session> session
 %type <step> step
 %type <permutation> permutation
+%type <waitinfo> opt_cancel
 
 %token <str> sqlblock string_literal
-%token PERMUTATION SESSION SETUP STEP TEARDOWN TEST
+%token <str> LITERAL
+%token CANCEL ON PERMUTATION SESSION SETUP STEP TEARDOWN TEST
 
 %%
 
@@ -140,16 +143,29 @@ step_list:
 
 
 step:
-			STEP string_literal sqlblock
+			STEP string_literal sqlblock opt_cancel
 			{
 				$$ = pg_malloc(sizeof(Step));
 				$$->name = $2;
 				$$->sql = $3;
 				$$->used = false;
 				$$->errormsg = NULL;
+				$$->waitinfo = $4;
 			}
 		;
 
+opt_cancel:
+			CANCEL ON string_literal string_literal
+			{
+				$$ = pg_malloc(sizeof(WaitInfo));
+				$$->wait_event_type = $3;
+				$$->wait_event = $4;
+			}
+			| /* EMPTY */
+			{
+				$$ = NULL;
+			}
+		;
 
 opt_permutation_list:
 			permutation_list
diff --git a/src/test/isolation/specscanner.l b/src/test/isolation/specscanner.l
index 410f17727e..672e5fa2fc 100644
--- a/src/test/isolation/specscanner.l
+++ b/src/test/isolation/specscanner.l
@@ -48,6 +48,8 @@ comment			("#"{non_newline}*)
 	litbufsize = LITBUF_INIT;
 %}
 
+cancel			{ return CANCEL; }
+on				{ return ON; }
 permutation		{ return PERMUTATION; }
 session			{ return SESSION; }
 setup			{ return SETUP; }
-- 
2.25.1

