Author: Noah Misch Commit: Noah Misch Not for commit; for demonstration only. Before applying this, apply https://postgr.es/m/CAPpHfdtSEOHX8dSk9Qp%2BZ%2B%2Bi4BGQoffKip6JDWngEA%2Bg7Z-XmQ%40mail.gmail.com diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 199d911..1c0a988 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -71,6 +71,7 @@ #include "storage/reinit.h" #include "storage/smgr.h" #include "storage/spin.h" +#include "storage/stopevent.h" #include "storage/sync.h" #include "utils/builtins.h" #include "utils/guc.h" @@ -3583,6 +3584,23 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, elog(ERROR, "InstallXLogFileSegment should not have failed"); } +static Jsonb * +InstallXLogFileSegmentEndStopEventParams(XLogSegNo segno) +{ + MemoryContext oldCxt; + JsonbParseState *state = NULL; + Jsonb *res; + + stopevents_make_cxt(); + oldCxt = MemoryContextSwitchTo(stopevents_cxt); + pushJsonbValue(&state, WJB_BEGIN_OBJECT, NULL); + jsonb_push_int8_key(&state, "segno", segno); + res = JsonbValueToJsonb(pushJsonbValue(&state, WJB_END_OBJECT, NULL)); + MemoryContextSwitchTo(oldCxt); + + return res; +} + /* * Install a new XLOG segment file as a current or future log segment. * @@ -3664,6 +3682,9 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath, if (use_lock) LWLockRelease(ControlFileLock); + STOPEVENT(InstallXLogFileSegmentEndStopEvent, + InstallXLogFileSegmentEndStopEventParams(*segno)); + return true; } diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c index 1c5a4f8..d9c5a3a 100644 --- a/src/backend/access/transam/xlogarchive.c +++ b/src/backend/access/transam/xlogarchive.c @@ -30,6 +30,7 @@ #include "storage/ipc.h" #include "storage/lwlock.h" #include "storage/pmsignal.h" +#include "storage/stopevent.h" /* * Attempt to retrieve the specified file from off-line archival storage. @@ -372,6 +373,23 @@ ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOn } +static Jsonb * +KeepFileRestoredFromArchiveStopEventParams(const char *xlogfname) +{ + MemoryContext oldCxt; + JsonbParseState *state = NULL; + Jsonb *res; + + stopevents_make_cxt(); + oldCxt = MemoryContextSwitchTo(stopevents_cxt); + pushJsonbValue(&state, WJB_BEGIN_OBJECT, NULL); + jsonb_push_string_key(&state, "xlogfname", xlogfname); + res = JsonbValueToJsonb(pushJsonbValue(&state, WJB_END_OBJECT, NULL)); + MemoryContextSwitchTo(oldCxt); + + return res; +} + /* * A file was restored from the archive under a temporary filename (path), * and now we want to keep it. Rename it under the permanent filename in @@ -386,6 +404,9 @@ KeepFileRestoredFromArchive(const char *path, const char *xlogfname) snprintf(xlogfpath, MAXPGPATH, XLOGDIR "/%s", xlogfname); + STOPEVENT(KeepFileRestoredFromArchiveStopEvent, + KeepFileRestoredFromArchiveStopEventParams(xlogfname)); + if (stat(xlogfpath, &statbuf) == 0) { char oldpath[MAXPGPATH]; @@ -424,6 +445,9 @@ KeepFileRestoredFromArchive(const char *path, const char *xlogfname) reload = true; } + STOPEVENT(KeepFileRestoredFromArchivePreRenameStopEvent, + KeepFileRestoredFromArchiveStopEventParams(xlogfname)); + durable_rename(path, xlogfpath, ERROR); /* diff --git a/src/backend/storage/lmgr/stopevent.c b/src/backend/storage/lmgr/stopevent.c index 6b6eb2b..251bdfe 100644 --- a/src/backend/storage/lmgr/stopevent.c +++ b/src/backend/storage/lmgr/stopevent.c @@ -280,12 +280,14 @@ handle_stopevent(int event_id, Jsonb *params) if (trace_stopevents) { char *params_string; + MemoryContext oldCxt; + oldCxt = MemoryContextSwitchTo(stopevents_cxt); params_string = DatumGetCString(DirectFunctionCall1(jsonb_out, PointerGetDatum(params))); elog(DEBUG2, "stop event \"%s\", params \"%s\"", stopeventnames[event_id], params_string); - pfree(params_string); + MemoryContextSwitchTo(oldCxt); } MemoryContextReset(stopevents_cxt); @@ -342,7 +344,12 @@ void relation_stopevent_params(JsonbParseState **state, Relation relation) { jsonb_push_int8_key(state, "datoid", MyDatabaseId); +#if 0 + /* infinite recursion to segfault */ jsonb_push_string_key(state, "datname", get_database_name(MyDatabaseId)); +#else + jsonb_push_string_key(state, "datname", "fake"); +#endif jsonb_push_int8_key(state, "reloid", relation->rd_id); jsonb_push_string_key(state, "relname", NameStr(relation->rd_rel->relname)); } diff --git a/src/backend/storage/lmgr/stopeventnames.txt b/src/backend/storage/lmgr/stopeventnames.txt index b5330a1..aadff7a 100644 --- a/src/backend/storage/lmgr/stopeventnames.txt +++ b/src/backend/storage/lmgr/stopeventnames.txt @@ -1,2 +1,5 @@ +InstallXLogFileSegmentEndStopEvent +KeepFileRestoredFromArchiveStopEvent +KeepFileRestoredFromArchivePreRenameStopEvent ReleaseAndReadBufferStopEvent EntryFindPostingLeafPageStopEvent diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index 84472b9..4c217d5 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -23,6 +23,7 @@ #include "mb/pg_wchar.h" #include "miscadmin.h" +#include "storage/stopevent.h" #include "utils/memdebug.h" #include "utils/memutils.h" @@ -126,6 +127,10 @@ MemoryContextInit(void) 8 * 1024, 8 * 1024); MemoryContextAllowInCriticalSection(ErrorContext, true); + + stopevents_make_cxt(); + /* not really okay; for a demo patch, I'll take my chances */ + MemoryContextAllowInCriticalSection(stopevents_cxt, true); } /* diff --git a/src/include/storage/stopevent.h b/src/include/storage/stopevent.h index a8b62f2..53dbf98 100644 --- a/src/include/storage/stopevent.h +++ b/src/include/storage/stopevent.h @@ -2,6 +2,7 @@ #define SRC_STOPEVENT_H #include "utils/jsonb.h" +#include "utils/rel.h" #include "storage/stopeventnames.h" extern bool enable_stopevents; diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm index 9667f76..caa6de5 100644 --- a/src/test/perl/PostgresNode.pm +++ b/src/test/perl/PostgresNode.pm @@ -1737,7 +1737,7 @@ sub poll_query_until my $cmd = [ 'psql', '-XAt', '-c', $query, '-d', $self->connstr($dbname) ]; my ($stdout, $stderr); - my $max_attempts = 180 * 10; + my $max_attempts = 3 * 10; my $attempts = 0; while ($attempts < $max_attempts) diff --git a/src/test/recovery/t/022_install_xlog.pl b/src/test/recovery/t/022_install_xlog.pl new file mode 100644 index 0000000..16d8a22 --- /dev/null +++ b/src/test/recovery/t/022_install_xlog.pl @@ -0,0 +1,96 @@ +# +# Test InstallXLogFileSegment()/KeepFileRestoredFromArchive() mutual exclusion +# +# Specifically, recreate this sequence of events: +# +# checkpointer enters XLogFileInit, finds no segno:2 file +# checkpointer finishes InstallXLogFileSegment, creating segno:2 file +# startupProc enters KeepFileRestoredFromArchive, unlinks segno:2 +# checkpointer enters the last BasicOpenFile of XLogFileInit, which fails +# startupProc finishes KeepFileRestoredFromArchive (too late) +# +use strict; +use warnings; +use PostgresNode; +use Test::More tests => 1; + +my $primary = get_new_node('primary'); +$primary->init( + has_archiving => 1, + allows_streaming => 1); +$primary->append_conf('postgresql.conf', 'autovacuum = off'); +$primary->start; +$primary->stop('immediate'); +$primary->backup_fs_cold('backup'); +$primary->start; + +# Standby will get these two segments from the archive. +$primary->safe_psql( + 'postgres', q{ + -- Standby makes a restart point from this checkpoint. + CREATE TABLE mine AS SELECT generate_series(1,10) AS x; + CHECKPOINT; + SELECT pg_switch_wal(); + -- Standby restores this segment in parallel with its restart point, + -- exercising the race condition. + INSERT INTO mine SELECT generate_series(10,20) AS x; + SELECT pg_switch_wal(); +}); + + +my $standby = get_new_node('standby'); +$standby->init_from_backup($primary, 'backup', has_restoring => 1); +# exercise PreallocXlogFiles(), not recycling +$standby->append_conf('postgresql.conf', 'wal_keep_size = 160MB'); +# break restore_command so we can first inject stop events +$standby->append_conf('postgresql.conf', 'restore_command = false'); +$standby->start; + +# register all stop events +$standby->safe_psql( + 'postgres', q{ + select pg_stopevent_set('InstallXLogFileSegmentEnd', '$.segno == 2'); + select pg_stopevent_set('KeepFileRestoredFromArchive', + '$.xlogfname == "000000010000000000000002"'); + select pg_stopevent_set('KeepFileRestoredFromArchivePreRename', + '$.xlogfname == "000000010000000000000002"'); +}); +# repair restore_command +$standby->enable_restoring($primary, 1); +$standby->safe_psql('postgres', q{SELECT pg_reload_conf();}); + + +# poll until some process is blocked as a pg_stopevent_set() directed +sub poll_until_event +{ + my $event = shift; + $standby->poll_query_until( + 'postgres', + qq{ SELECT true FROM pg_stopevents() WHERE cardinality(waiters) <> 0 + AND event_name = '$event'; } + ) or die "Timed out while waiting for $event on standby"; +} + +poll_until_event 'KeepFileRestoredFromArchive'; # segno==2 restored + +# Begin a restartpoint, pausing at CreateRestartPoint -> PreallocXlogFiles -> +# XLogFileInit -> InstallXLogFileSegment -> InstallXLogFileSegmentEnd. +my $in = ''; +my $out = ''; +my $timer = IPC::Run::timeout(180); +my $h = $standby->background_psql('postgres', \$in, \$out, $timer); +$in .= "CHECKPOINT;\n"; +$h->pump_nb; +poll_until_event 'InstallXLogFileSegmentEnd'; + +# release startupProc to unlink segno==2 +$standby->safe_psql('postgres', + q{SELECT pg_stopevent_reset('KeepFileRestoredFromArchive');}); +poll_until_event 'KeepFileRestoredFromArchivePreRename'; +# release checkpointer to open segno==2 +$standby->safe_psql('postgres', + q{SELECT pg_stopevent_reset('InstallXLogFileSegmentEnd');}); +ok($h->finish, 'CHECKPOINT completed'); +# release startupProc to rename file onto segno==2, but it's too late +$standby->safe_psql('postgres', + q{SELECT pg_stopevent_reset('KeepFileRestoredFromArchivePreRename');});