From 9e4be1d7031a7211e9ed497b851127ee5e58e384 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Thu, 11 Jan 2024 13:10:52 +0900
Subject: [PATCH v9 4/4] Add basic test for promotion and restart race
 condition

This test fails after 7863ee4def65 is reverted.  test_injection_points
is extended so as it is possible to add condition variables to wait for
in the point callbacks, with a SQL function to broadcast condition
variables that may be sleeping.

I guess that this should be extended so as there is more than one
condition variable stored in shmem for this module, controlling which
variable to wait for directly in the callback itself, but that's not
really necessary now.
---
 src/backend/access/transam/xlog.c             |   7 +
 .../injection_points--1.0.sql                 |  10 ++
 .../injection_points/injection_points.c       |  69 +++++++++
 src/test/recovery/meson.build                 |   1 +
 .../t/041_invalid_checkpoint_after_promote.pl | 138 ++++++++++++++++++
 5 files changed, 225 insertions(+)
 create mode 100644 src/test/recovery/t/041_invalid_checkpoint_after_promote.pl

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 478377c4a2..29e8f798f8 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -100,6 +100,7 @@
 #include "storage/sync.h"
 #include "utils/guc_hooks.h"
 #include "utils/guc_tables.h"
+#include "utils/injection_point.h"
 #include "utils/memutils.h"
 #include "utils/ps_status.h"
 #include "utils/relmapper.h"
@@ -7428,6 +7429,12 @@ CreateRestartPoint(int flags)
 
 	CheckPointGuts(lastCheckPoint.redo, flags);
 
+	/*
+	 * This location is important to be after CheckPointGuts() to ensure
+	 * that some work has happened.
+	 */
+	INJECTION_POINT("CreateRestartPoint");
+
 	/*
 	 * Remember the prior checkpoint's redo ptr for
 	 * UpdateCheckPointDistanceEstimate()
diff --git a/src/test/modules/injection_points/injection_points--1.0.sql b/src/test/modules/injection_points/injection_points--1.0.sql
index 64ff69bd7f..09ff14e24d 100644
--- a/src/test/modules/injection_points/injection_points--1.0.sql
+++ b/src/test/modules/injection_points/injection_points--1.0.sql
@@ -25,6 +25,16 @@ RETURNS void
 AS 'MODULE_PATHNAME', 'injection_points_run'
 LANGUAGE C STRICT PARALLEL UNSAFE;
 
+--
+-- injection_points_wake()
+--
+-- Wakes a condition variable executed in an injection point.
+--
+CREATE FUNCTION injection_points_wake()
+RETURNS void
+AS 'MODULE_PATHNAME', 'injection_points_wake'
+LANGUAGE C STRICT PARALLEL UNSAFE;
+
 --
 -- injection_points_detach()
 --
diff --git a/src/test/modules/injection_points/injection_points.c b/src/test/modules/injection_points/injection_points.c
index 47e7522faf..599ced22a6 100644
--- a/src/test/modules/injection_points/injection_points.c
+++ b/src/test/modules/injection_points/injection_points.c
@@ -18,6 +18,7 @@
 #include "postgres.h"
 
 #include "fmgr.h"
+#include "storage/condition_variable.h"
 #include "storage/lwlock.h"
 #include "storage/shmem.h"
 #include "utils/builtins.h"
@@ -26,10 +27,48 @@
 
 PG_MODULE_MAGIC;
 
+/* Shared state information for injection points. */
+typedef struct TestInjectionPointSharedState
+{
+	/*
+	 * Wait variable that can be registered at a given point, and that can be
+	 * awakened via SQL.
+	 */
+	ConditionVariable	wait_point;
+} TestInjectionPointSharedState;
+
+/* Pointer to shared-memory state. */
+static TestInjectionPointSharedState *inj_state = NULL;
+
+/* Wait event when waiting on condition variable */
+static uint32 injection_wait_event = 0;
+
 extern PGDLLEXPORT void injection_error(const char *name);
 extern PGDLLEXPORT void injection_notice(const char *name);
+extern PGDLLEXPORT void injection_wait(const char *name);
 
 
+static void
+injection_init_shmem(void)
+{
+	bool		found;
+
+	if (inj_state != NULL)
+		return;
+
+	LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
+	inj_state = ShmemInitStruct("injection_points",
+								sizeof(TestInjectionPointSharedState),
+								&found);
+	if (!found)
+	{
+		/* First time through ... */
+		MemSet(inj_state, 0, sizeof(TestInjectionPointSharedState));
+		ConditionVariableInit(&inj_state->wait_point);
+	}
+	LWLockRelease(AddinShmemInitLock);
+}
+
 /* Set of callbacks available at point creation */
 void
 injection_error(const char *name)
@@ -43,6 +82,20 @@ injection_notice(const char *name)
 	elog(NOTICE, "notice triggered for injection point %s", name);
 }
 
+void
+injection_wait(const char *name)
+{
+	if (inj_state == NULL)
+		injection_init_shmem();
+	if (injection_wait_event == 0)
+		injection_wait_event = WaitEventExtensionNew("injection_wait");
+
+	/* And sleep.. */
+	ConditionVariablePrepareToSleep(&inj_state->wait_point);
+	ConditionVariableSleep(&inj_state->wait_point, injection_wait_event);
+	ConditionVariableCancelSleep();
+}
+
 /*
  * SQL function for creating an injection point.
  */
@@ -58,6 +111,8 @@ injection_points_attach(PG_FUNCTION_ARGS)
 		function = "injection_error";
 	else if (strcmp(mode, "notice") == 0)
 		function = "injection_notice";
+	else if (strcmp(mode, "wait") == 0)
+		function = "injection_wait";
 	else
 		elog(ERROR, "incorrect mode \"%s\" for injection point creation", mode);
 
@@ -80,6 +135,20 @@ injection_points_run(PG_FUNCTION_ARGS)
 	PG_RETURN_VOID();
 }
 
+/*
+ * SQL function for waking a condition variable.
+ */
+PG_FUNCTION_INFO_V1(injection_points_wake);
+Datum
+injection_points_wake(PG_FUNCTION_ARGS)
+{
+	if (inj_state == NULL)
+		injection_init_shmem();
+
+	ConditionVariableBroadcast(&inj_state->wait_point);
+	PG_RETURN_VOID();
+}
+
 /*
  * SQL function for dropping an injection point.
  */
diff --git a/src/test/recovery/meson.build b/src/test/recovery/meson.build
index 43d7411a79..c12b7fcf97 100644
--- a/src/test/recovery/meson.build
+++ b/src/test/recovery/meson.build
@@ -49,6 +49,7 @@ tests += {
       't/038_save_logical_slots_shutdown.pl',
       't/039_end_of_wal.pl',
       't/040_snapshot_status.pl',
+      't/041_invalid_checkpoint_after_promote.pl',
     ],
   },
 }
diff --git a/src/test/recovery/t/041_invalid_checkpoint_after_promote.pl b/src/test/recovery/t/041_invalid_checkpoint_after_promote.pl
new file mode 100644
index 0000000000..886454d50c
--- /dev/null
+++ b/src/test/recovery/t/041_invalid_checkpoint_after_promote.pl
@@ -0,0 +1,138 @@
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Time::HiRes qw(usleep);
+use Test::More;
+
+if ($ENV{enable_injection_points} ne 'yes')
+{
+	plan skip_all => 'Injection points not supported by this build';
+}
+
+# initialize primary node
+my $node_primary = PostgreSQL::Test::Cluster->new('master');
+$node_primary->init(allows_streaming => 1);
+$node_primary->append_conf(
+	'postgresql.conf', q[
+checkpoint_timeout = 30s
+log_checkpoints = on
+restart_after_crash = on
+]);
+$node_primary->start;
+my $backup_name = 'my_backup';
+$node_primary->backup($backup_name);
+
+# setup a standby
+my $node_standby = PostgreSQL::Test::Cluster->new('standby1');
+$node_standby->init_from_backup($node_primary, $backup_name, has_streaming => 1);
+$node_standby->start;
+
+# dummy table for the upcoming tests.
+$node_primary->safe_psql('postgres', 'checkpoint');
+$node_primary->safe_psql('postgres', 'CREATE TABLE prim_tab (a int);');
+
+# Register a injection point on the standby so as the follow-up
+# restart point running on it will wait.
+$node_primary->safe_psql('postgres', 'CREATE EXTENSION injection_points;');
+# Wait until the extension has been created on the standby
+$node_primary->wait_for_replay_catchup($node_standby);
+# This causes a restartpoint to wait on a standby.
+$node_standby->safe_psql('postgres',
+  "SELECT injection_points_attach('CreateRestartPoint', 'wait');");
+
+# Execute a restart point on the standby, that will be waited on.
+# This needs to be in the background as we'll wait on it.
+my $logstart = -s $node_standby->logfile;
+my $psql_session =
+  $node_standby->background_psql('postgres', on_error_stop => 0);
+$psql_session->query_until(qr/starting_checkpoint/, q(
+   \echo starting_checkpoint
+   CHECKPOINT;
+));
+
+# Switch one WAL segment to make the restartpoint remove it.
+$node_primary->safe_psql('postgres', 'INSERT INTO prim_tab VALUES (1);');
+$node_primary->safe_psql('postgres', 'SELECT pg_switch_wal();');
+$node_primary->wait_for_replay_catchup($node_standby);
+
+# Wait until the checkpointer is in the middle of the restartpoint
+# processing.
+ok( $node_standby->poll_query_until(
+	'postgres',
+	qq[SELECT count(*) FROM pg_stat_activity
+           WHERE backend_type = 'checkpointer' AND wait_event = 'injection_wait' ;],
+	'1'),
+    'checkpointer is waiting at restart point'
+    ) or die "Timed out while waiting for checkpointer to run restartpoint";
+
+
+# Restartpoint should have started on standby.
+my $log = slurp_file($node_standby->logfile, $logstart);
+my $checkpoint_start = 0;
+if ($log =~ m/restartpoint starting: immediate wait/)
+{
+	$checkpoint_start = 1;
+}
+is($checkpoint_start, 1, 'restartpoint has started');
+
+# promote during restartpoint
+$node_primary->stop;
+$node_standby->promote;
+
+# Update the start position before waking up the checkpointer!
+$logstart = -s $node_standby->logfile;
+
+# Now wake up the checkpointer
+$node_standby->safe_psql('postgres',
+  "SELECT injection_points_wake();");
+
+# wait until checkpoint completes on the newly-promoted standby.
+my $checkpoint_complete = 0;
+for (my $i = 0; $i < 3000; $i++)
+{
+	my $log = slurp_file($node_standby->logfile, $logstart);
+	if ($log =~ m/restartpoint complete/)
+	{
+		$checkpoint_complete = 1;
+		last;
+	}
+	usleep(100_000);
+}
+is($checkpoint_complete, 1, 'restartpoint has completed');
+
+# kill SIGKILL a backend, and all backend will restart. Note that previous
+# checkpoint has not completed.
+my $psql_timeout = IPC::Run::timer(3600);
+my ($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', '');
+my $killme = IPC::Run::start(
+	[ 'psql', '-XAtq', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d', $node_standby->connstr('postgres') ],
+	'<',
+	\$killme_stdin,
+	'>',
+	\$killme_stdout,
+	'2>',
+	\$killme_stderr,
+	$psql_timeout);
+$killme_stdin .= q[
+SELECT pg_backend_pid();
+];
+$killme->pump until $killme_stdout =~ /[[:digit:]]+[\r\n]$/;
+my $pid = $killme_stdout;
+chomp($pid);
+my $ret = PostgreSQL::Test::Utils::system_log('pg_ctl', 'kill', 'KILL', $pid);
+is($ret, 0, 'killed process with KILL');
+my $stdout;
+my $stderr;
+
+# After recovery, the server should be able to start.
+for (my $i = 0; $i < 30; $i++)
+{
+    ($ret, $stdout, $stderr) = $node_standby->psql('postgres', 'select 1');
+    last if $ret == 0;
+	sleep(1);
+}
+is($ret, 0, "psql connect success");
+is($stdout, 1, "psql select 1");
+
+done_testing();
-- 
2.43.0

