doc/src/sgml/config.sgml | 15 +++++++ src/backend/access/transam/xlog.c | 29 +++++++++++++- src/backend/storage/buffer/bufmgr.c | 4 +- src/backend/storage/ipc/procsignal.c | 36 +++++++++++++++++ src/backend/storage/ipc/standby.c | 57 ++++++++++++++++++++++++++- src/backend/utils/misc/guc.c | 10 ++++- src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/storage/procsignal.h | 1 + src/include/storage/standby.h | 3 +- 9 files changed, 150 insertions(+), 6 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index aca8f73a50..5580ea6978 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -6763,6 +6763,21 @@ log_line_prefix = '%m [%p] %q%u@%d/%a ' + + log_recovery_conflicts (boolean) + + log_recovery_conflicts configuration parameter + + + + + Controls whether detailed information is produced when a conflict + occurs during standby recovery. The default is on. + Only superusers can change this setting. + + + + log_parameter_max_length (integer) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 55cac186dc..95e5b0457b 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -11745,10 +11745,37 @@ rm_redo_error_callback(void *arg) { XLogReaderState *record = (XLogReaderState *) arg; StringInfoData buf; + int block_id; + RelFileNode rnode; + ForkNumber forknum; + BlockNumber blknum; + TimestampTz rtime; + bool fromStream; + char *receipt_time_str; initStringInfo(&buf); xlog_outdesc(&buf, record); - + /* + * Ensure we are in the startup process + * if we want to log standby recovery conflicts + */ + if (AmStartupProcess() && log_recovery_conflicts) + { + GetXLogReceiptTime(&rtime, &fromStream); + if (fromStream) + { + receipt_time_str = pstrdup(timestamptz_to_str(rtime)); + appendStringInfo(&buf,"\nWAL record received at %s", receipt_time_str); + for (block_id = 0; block_id <= record->max_block_id; block_id++) + { + if (XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blknum)) + appendStringInfo(&buf,"\ntbs %u db %u rel %u, fork %s, blkno %u", + rnode.spcNode, rnode.dbNode, rnode.relNode, + forkNames[forknum], + blknum); + } + } + } /* translator: %s is a WAL record description */ errcontext("WAL redo at %X/%X for %s", (uint32) (record->ReadRecPtr >> 32), diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 29c920800a..50c607e0e8 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -3780,6 +3780,7 @@ LockBufferForCleanup(Buffer buffer) { BufferDesc *bufHdr; char *new_status = NULL; + bool first_pass = true; Assert(BufferIsValid(buffer)); Assert(PinCountWaitBuf == NULL); @@ -3856,7 +3857,8 @@ LockBufferForCleanup(Buffer buffer) /* Publish the bufid that Startup process waits on */ SetStartupBufferPinWaitBufId(buffer - 1); /* Set alarm and then wait to be signaled by UnpinBuffer() */ - ResolveRecoveryConflictWithBufferPin(); + ResolveRecoveryConflictWithBufferPin(first_pass); + first_pass = false; /* Reset the published bufid */ SetStartupBufferPinWaitBufId(-1); } diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c index c809196d06..f83176cfa7 100644 --- a/src/backend/storage/ipc/procsignal.c +++ b/src/backend/storage/ipc/procsignal.c @@ -576,3 +576,39 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) errno = save_errno; } + +extern const char *get_procsignal_reason_desc(ProcSignalReason reason) + { + const char *reasonDesc = "unknown reason"; + + switch (reason) + { + case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN: + reasonDesc = "buffer pin"; + break; + case PROCSIG_RECOVERY_CONFLICT_LOCK: + reasonDesc = "lock"; + break; + case PROCSIG_RECOVERY_CONFLICT_TABLESPACE: + reasonDesc = "tablespace"; + break; + case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT: + reasonDesc = "snapshot"; + break; + case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK: + reasonDesc = "deadlock"; + break; + case PROCSIG_RECOVERY_CONFLICT_DATABASE: + reasonDesc = "database"; + break; + case PROCSIG_CATCHUP_INTERRUPT: + case PROCSIG_NOTIFY_INTERRUPT: + case PROCSIG_PARALLEL_MESSAGE: + case PROCSIG_WALSND_INIT_STOPPING: + case NUM_PROCSIGNALS: + break; + default: + break; + } + return reasonDesc; +} diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 9e0d5ec257..3152402660 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -39,6 +39,9 @@ int vacuum_defer_cleanup_age; int max_standby_archive_delay = 30 * 1000; int max_standby_streaming_delay = 30 * 1000; +bool log_recovery_conflicts = false; + +static TimestampTz recovery_conflicts_log_time; static HTAB *RecoveryLockLists; @@ -215,6 +218,27 @@ WaitExceedsMaxStandbyDelay(uint32 wait_event_info) return false; } +/* + * Display information about the wal record + * apply being blocked + */ +static void +LogBlockedWalRecordInfo(int num_waitlist_entries, ProcSignalReason reason) +{ + if (num_waitlist_entries > 0) + if (reason == PROCSIG_RECOVERY_CONFLICT_DATABASE) + ereport(LOG, + (errmsg("recovery is experiencing recovery conflict on %s", get_procsignal_reason_desc(reason)), + errdetail("There is %d conflicting connection(s).", num_waitlist_entries))); + else + ereport(LOG, + (errmsg("recovery is waiting for recovery conflict on %s", get_procsignal_reason_desc(reason)), + errdetail("There is %d blocking connection(s).", num_waitlist_entries))); + else + ereport(LOG, + (errmsg("recovery is waiting for recovery conflict on %s", get_procsignal_reason_desc(reason)))); +} + /* * This is the main executioner for any query backend that conflicts with * recovery processing. Judgement has already been passed on it within @@ -232,6 +256,8 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, { TimestampTz waitStart = 0; char *new_status; + VirtualTransactionId *tmpWaitlist; + int num_waitlist_entries; /* Fast exit, to avoid a kernel call if there's no work to be done. */ if (!VirtualTransactionIdIsValid(*waitlist)) @@ -241,6 +267,21 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, waitStart = GetCurrentTimestamp(); new_status = NULL; /* we haven't changed the ps display */ + tmpWaitlist = waitlist; + while (VirtualTransactionIdIsValid(*tmpWaitlist)) + { + tmpWaitlist++; + } + + num_waitlist_entries = (tmpWaitlist - waitlist); + + /* display wal record information */ + if (log_recovery_conflicts && (TimestampDifferenceExceeds(recovery_conflicts_log_time, GetCurrentTimestamp(), + DeadlockTimeout))) { + LogBlockedWalRecordInfo(num_waitlist_entries, reason); + recovery_conflicts_log_time = GetCurrentTimestamp(); + } + while (VirtualTransactionIdIsValid(*waitlist)) { /* reset standbyWait_us for each xact we wait for */ @@ -370,6 +411,11 @@ ResolveRecoveryConflictWithDatabase(Oid dbid) * block during InitPostgres() and then disconnect when they see the * database has been removed. */ + + /* display wal record information */ + if (log_recovery_conflicts) + LogBlockedWalRecordInfo(CountDBBackends(dbid), PROCSIG_RECOVERY_CONFLICT_DATABASE); + while (CountDBBackends(dbid) > 0) { CancelDBBackends(dbid, PROCSIG_RECOVERY_CONFLICT_DATABASE, true); @@ -480,7 +526,7 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag) * at least deadlock_timeout. */ void -ResolveRecoveryConflictWithBufferPin(void) +ResolveRecoveryConflictWithBufferPin(bool first_pass) { TimestampTz ltime; @@ -488,6 +534,14 @@ ResolveRecoveryConflictWithBufferPin(void) ltime = GetStandbyLimitTime(); + /* display wal record information */ + if (log_recovery_conflicts && first_pass + && (TimestampDifferenceExceeds(recovery_conflicts_log_time, GetCurrentTimestamp(), + DeadlockTimeout))) { + LogBlockedWalRecordInfo(-1, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); + recovery_conflicts_log_time = GetCurrentTimestamp();; + } + if (ltime == 0) { /* @@ -522,7 +576,6 @@ ResolveRecoveryConflictWithBufferPin(void) /* Wait to be signaled by UnpinBuffer() */ ProcWaitForSignal(PG_WAIT_BUFFER_PIN); - /* * Clear any timeout requests established above. We assume here that the * Startup process doesn't have any other timeouts than what this function diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 2f3e0a70e0..aa3c3c2325 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1594,7 +1594,15 @@ static struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, - + { + {"log_recovery_conflicts", PGC_SIGHUP, LOGGING_WHAT, + gettext_noop("Logs standby recovery conflicts."), + NULL + }, + &log_recovery_conflicts, + false, + NULL, NULL, NULL + }, { {"log_hostname", PGC_SIGHUP, LOGGING_WHAT, gettext_noop("Logs the host name in the connection logs."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index ac02bd0c00..40da69c07c 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -548,6 +548,7 @@ # %% = '%' # e.g. '<%u%%%d> ' #log_lock_waits = off # log lock waits >= deadlock_timeout +#log_recovery_conflicts = off # log standby recovery conflicts #log_parameter_max_length = -1 # when logging statements, limit logged # bind-parameter values to N bytes; # -1 means print in full, 0 disables diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h index a0c0bc3ce5..c57413b06d 100644 --- a/src/include/storage/procsignal.h +++ b/src/include/storage/procsignal.h @@ -70,5 +70,6 @@ extern void WaitForProcSignalBarrier(uint64 generation); extern void ProcessProcSignalBarrier(void); extern void procsignal_sigusr1_handler(SIGNAL_ARGS); +extern const char *get_procsignal_reason_desc(ProcSignalReason reason); #endif /* PROCSIGNAL_H */ diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h index cfbe426e5a..cd2c73a2b5 100644 --- a/src/include/storage/standby.h +++ b/src/include/storage/standby.h @@ -23,6 +23,7 @@ extern int vacuum_defer_cleanup_age; extern int max_standby_archive_delay; extern int max_standby_streaming_delay; +extern bool log_recovery_conflicts; extern void InitRecoveryTransactionEnvironment(void); extern void ShutdownRecoveryTransactionEnvironment(void); @@ -33,7 +34,7 @@ extern void ResolveRecoveryConflictWithTablespace(Oid tsid); extern void ResolveRecoveryConflictWithDatabase(Oid dbid); extern void ResolveRecoveryConflictWithLock(LOCKTAG locktag); -extern void ResolveRecoveryConflictWithBufferPin(void); +extern void ResolveRecoveryConflictWithBufferPin(bool first_pass); extern void CheckRecoveryConflictDeadlock(void); extern void StandbyDeadLockHandler(void); extern void StandbyTimeoutHandler(void);