doc/src/sgml/config.sgml | 15 ++++++ src/backend/storage/ipc/procsignal.c | 36 +++++++++++++ src/backend/storage/ipc/standby.c | 77 +++++++++++++++++++++++++++ src/backend/utils/misc/guc.c | 10 +++- src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/storage/procsignal.h | 1 + src/include/storage/standby.h | 1 + 7 files changed, 140 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index aca8f73a50..5580ea6978 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -6763,6 +6763,21 @@ log_line_prefix = '%m [%p] %q%u@%d/%a ' + + log_recovery_conflicts (boolean) + + log_recovery_conflicts configuration parameter + + + + + Controls whether detailed information is produced when a conflict + occurs during standby recovery. The default is on. + Only superusers can change this setting. + + + + log_parameter_max_length (integer) diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c index c809196d06..f83176cfa7 100644 --- a/src/backend/storage/ipc/procsignal.c +++ b/src/backend/storage/ipc/procsignal.c @@ -576,3 +576,39 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) errno = save_errno; } + +extern const char *get_procsignal_reason_desc(ProcSignalReason reason) + { + const char *reasonDesc = "unknown reason"; + + switch (reason) + { + case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN: + reasonDesc = "buffer pin"; + break; + case PROCSIG_RECOVERY_CONFLICT_LOCK: + reasonDesc = "lock"; + break; + case PROCSIG_RECOVERY_CONFLICT_TABLESPACE: + reasonDesc = "tablespace"; + break; + case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT: + reasonDesc = "snapshot"; + break; + case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK: + reasonDesc = "deadlock"; + break; + case PROCSIG_RECOVERY_CONFLICT_DATABASE: + reasonDesc = "database"; + break; + case PROCSIG_CATCHUP_INTERRUPT: + case PROCSIG_NOTIFY_INTERRUPT: + case PROCSIG_PARALLEL_MESSAGE: + case PROCSIG_WALSND_INIT_STOPPING: + case NUM_PROCSIGNALS: + break; + default: + break; + } + return reasonDesc; +} diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 9e0d5ec257..126858a363 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -39,6 +39,7 @@ int vacuum_defer_cleanup_age; int max_standby_archive_delay = 30 * 1000; int max_standby_streaming_delay = 30 * 1000; +bool log_recovery_conflicts = true; static HTAB *RecoveryLockLists; @@ -215,6 +216,59 @@ WaitExceedsMaxStandbyDelay(uint32 wait_event_info) return false; } +/* + * Display information about the wal record + * apply being blocked + */ +static void +LogBlockedWalRecordInfo(int num_waitlist_entries, ProcSignalReason reason) +{ + RelFileNode rnode; + ForkNumber forknum; + BlockNumber blknum; + TimestampTz rtime; + bool fromStream; + char *receipt_time_str; + char *wal_record_detail_str; + char *recovery_conflict_main_message; + ErrorContextCallback *econtext; + MemoryContext oldcontext; + + recovery_conflict_main_message = psprintf("waiting for recovery conflict on %s", + get_procsignal_reason_desc(reason)); + + GetXLogReceiptTime(&rtime, &fromStream); + receipt_time_str = pstrdup(timestamptz_to_str(rtime)); + + + oldcontext = MemoryContextSwitchTo(ErrorContext); + econtext = error_context_stack; + + if (XLogRecGetBlockTag(econtext->arg, 0, &rnode, &forknum, &blknum)) + wal_record_detail_str = psprintf("WAL record received at %s.\nTablespace/database/relation are %u/%u/%u, fork is %s and block is %u.", + receipt_time_str, rnode.spcNode, rnode.dbNode, rnode.relNode, + forkNames[forknum], + blknum); + else + wal_record_detail_str = psprintf("WAL record received at %s.", receipt_time_str); + + MemoryContextSwitchTo(oldcontext); + + if (num_waitlist_entries > 0) + ereport(LOG, + (errmsg("%s", recovery_conflict_main_message), + errdetail("%s\n" "There is %d blocking connection(s).", wal_record_detail_str, num_waitlist_entries))); + else + ereport(LOG, + (errmsg("%s", recovery_conflict_main_message), + errdetail("%s", wal_record_detail_str))); + + + pfree(recovery_conflict_main_message); + pfree(wal_record_detail_str); + pfree(receipt_time_str); +} + /* * This is the main executioner for any query backend that conflicts with * recovery processing. Judgement has already been passed on it within @@ -232,6 +286,8 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, { TimestampTz waitStart = 0; char *new_status; + VirtualTransactionId *tmpWaitlist; + int num_waitlist_entries; /* Fast exit, to avoid a kernel call if there's no work to be done. */ if (!VirtualTransactionIdIsValid(*waitlist)) @@ -241,6 +297,18 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, waitStart = GetCurrentTimestamp(); new_status = NULL; /* we haven't changed the ps display */ + tmpWaitlist = waitlist; + while (VirtualTransactionIdIsValid(*tmpWaitlist)) + { + tmpWaitlist++; + } + + num_waitlist_entries = (tmpWaitlist - waitlist); + + /* display wal record information */ + if (log_recovery_conflicts) + LogBlockedWalRecordInfo(num_waitlist_entries, reason); + while (VirtualTransactionIdIsValid(*waitlist)) { /* reset standbyWait_us for each xact we wait for */ @@ -370,6 +438,11 @@ ResolveRecoveryConflictWithDatabase(Oid dbid) * block during InitPostgres() and then disconnect when they see the * database has been removed. */ + + /* display wal record information */ + if (log_recovery_conflicts) + LogBlockedWalRecordInfo(CountDBBackends(dbid), PROCSIG_RECOVERY_CONFLICT_DATABASE); + while (CountDBBackends(dbid) > 0) { CancelDBBackends(dbid, PROCSIG_RECOVERY_CONFLICT_DATABASE, true); @@ -609,6 +682,10 @@ StandbyTimeoutHandler(void) /* forget any pending STANDBY_DEADLOCK_TIMEOUT request */ disable_timeout(STANDBY_DEADLOCK_TIMEOUT, false); + /* display wal record information */ + if (log_recovery_conflicts) + LogBlockedWalRecordInfo(-1, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); + SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); } diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 2f3e0a70e0..7aa55db5af 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1594,7 +1594,15 @@ static struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, - + { + {"log_recovery_conflicts", PGC_SUSET, LOGGING_WHAT, + gettext_noop("Logs standby recovery conflicts."), + NULL + }, + &log_recovery_conflicts, + true, + NULL, NULL, NULL + }, { {"log_hostname", PGC_SIGHUP, LOGGING_WHAT, gettext_noop("Logs the host name in the connection logs."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index ac02bd0c00..1009506eff 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -548,6 +548,7 @@ # %% = '%' # e.g. '<%u%%%d> ' #log_lock_waits = off # log lock waits >= deadlock_timeout +#log_recovery_conflicts = on # log standby recovery conflicts #log_parameter_max_length = -1 # when logging statements, limit logged # bind-parameter values to N bytes; # -1 means print in full, 0 disables diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h index a0c0bc3ce5..c57413b06d 100644 --- a/src/include/storage/procsignal.h +++ b/src/include/storage/procsignal.h @@ -70,5 +70,6 @@ extern void WaitForProcSignalBarrier(uint64 generation); extern void ProcessProcSignalBarrier(void); extern void procsignal_sigusr1_handler(SIGNAL_ARGS); +extern const char *get_procsignal_reason_desc(ProcSignalReason reason); #endif /* PROCSIGNAL_H */ diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h index cfbe426e5a..0a99ad3f40 100644 --- a/src/include/storage/standby.h +++ b/src/include/storage/standby.h @@ -23,6 +23,7 @@ extern int vacuum_defer_cleanup_age; extern int max_standby_archive_delay; extern int max_standby_streaming_delay; +extern bool log_recovery_conflicts; extern void InitRecoveryTransactionEnvironment(void); extern void ShutdownRecoveryTransactionEnvironment(void);