doc/src/sgml/config.sgml | 22 ++++++++++ src/backend/storage/buffer/bufmgr.c | 13 ++++++ src/backend/storage/ipc/procsignal.c | 58 +++++++++++++++++++++++++++ src/backend/storage/ipc/standby.c | 57 ++++++++++++++++++++++++++ src/backend/storage/lmgr/proc.c | 9 +++++ src/backend/utils/misc/guc.c | 10 ++++- src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/storage/procsignal.h | 1 + src/include/storage/standby.h | 2 + 9 files changed, 172 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index ee914740cc..6023c7d8c0 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -6839,6 +6839,28 @@ log_line_prefix = '%m [%p] %q%u@%d/%a ' + + log_recovery_conflict_waits (boolean) + + log_recovery_conflict_waits configuration parameter + + + + + Controls whether detailed information is produced when the + startup process is resolving or waiting for recovery conflicts. + It logs only when it waits for longer than deadlock_timeout. + For the lock conflict case, it does not wait for deadlock_timeout to be reached. + + + + This parameter can only be set in the postgresql.conf + file or on the server command line. + The default is off. + + + + log_parameter_max_length (integer) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index e549fa1d30..152c2292d8 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -3808,6 +3808,8 @@ LockBufferForCleanup(Buffer buffer) { BufferDesc *bufHdr; char *new_status = NULL; + TimestampTz waitStart = 0; + bool logged; Assert(BufferIsPinned(buffer)); Assert(PinCountWaitBuf == NULL); @@ -3828,6 +3830,8 @@ LockBufferForCleanup(Buffer buffer) GetPrivateRefCount(buffer)); bufHdr = GetBufferDescriptor(buffer - 1); + waitStart = GetCurrentTimestamp(); + logged = false; for (;;) { @@ -3881,6 +3885,15 @@ LockBufferForCleanup(Buffer buffer) new_status[len] = '\0'; /* truncate off " waiting" */ } + if (log_recovery_conflict_waits && !logged + && TimestampDifferenceExceeds(waitStart, + GetCurrentTimestamp(), DeadlockTimeout)) + { + LogRecoveryConflict(NULL, + PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, true); + logged = true; + } + /* Publish the bufid that Startup process waits on */ SetStartupBufferPinWaitBufId(buffer - 1); /* Set alarm and then wait to be signaled by UnpinBuffer() */ diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c index ffe67acea1..d4573a8997 100644 --- a/src/backend/storage/ipc/procsignal.c +++ b/src/backend/storage/ipc/procsignal.c @@ -591,3 +591,61 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) errno = save_errno; } + +extern const char *get_recovery_conflict_message(ProcSignalReason reason, bool waiting) +{ + const char *conflict_message; + if (waiting) + conflict_message = "recovery is waiting recovery conflict on unknown reason"; + else + conflict_message = "recovery is resolving recovery conflict on unknown reason"; + + switch (reason) + { + case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN: + if (waiting) + conflict_message = "recovery is waiting recovery conflict on buffer pin"; + else + conflict_message = "recovery is resolving recovery conflict on buffer pin"; + break; + case PROCSIG_RECOVERY_CONFLICT_LOCK: + if (waiting) + conflict_message = "recovery is waiting recovery conflict on lock"; + else + conflict_message = "recovery is resolving recovery conflict on lock"; + break; + case PROCSIG_RECOVERY_CONFLICT_TABLESPACE: + if (waiting) + conflict_message = "recovery is waiting recovery conflict on tablespace"; + else + conflict_message = "recovery is resolving recovery conflict on tablespace"; + break; + case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT: + if (waiting) + conflict_message = "recovery is waiting recovery conflict on snapshot"; + else + conflict_message = "recovery is resolving recovery conflict on snapshot"; + break; + case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK: + if (waiting) + conflict_message = "recovery is waiting recovery conflict on deadlock"; + else + conflict_message = "recovery is resolving recovery conflict on deadlock"; + break; + case PROCSIG_RECOVERY_CONFLICT_DATABASE: + if (waiting) + conflict_message = "recovery is waiting recovery conflict on database"; + else + conflict_message = "recovery is resolving recovery conflict on database"; + break; + case PROCSIG_CATCHUP_INTERRUPT: + case PROCSIG_NOTIFY_INTERRUPT: + case PROCSIG_PARALLEL_MESSAGE: + case PROCSIG_WALSND_INIT_STOPPING: + case NUM_PROCSIGNALS: + break; + default: + break; + } + return conflict_message; +} diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 52b2809dac..6800eb0c11 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -39,6 +39,7 @@ int vacuum_defer_cleanup_age; int max_standby_archive_delay = 30 * 1000; int max_standby_streaming_delay = 30 * 1000; +bool log_recovery_conflict_waits = false; static HTAB *RecoveryLockLists; @@ -215,6 +216,48 @@ WaitExceedsMaxStandbyDelay(uint32 wait_event_info) return false; } +/* + * Log the recovery conflict + */ +void +LogRecoveryConflict(VirtualTransactionId *waitlist, ProcSignalReason reason, bool waiting) +{ + if (waitlist && waiting) + { + VirtualTransactionId *vxids; + StringInfoData buf; + int count; + + count = 0; + vxids = waitlist; + initStringInfo(&buf); + + while (VirtualTransactionIdIsValid(*vxids)) + { + count++; + if (count > 1) + appendStringInfo(&buf, ", "); + + appendStringInfo(&buf, "%d/%u", vxids->backendId, + vxids->localTransactionId); + vxids++; + } + ereport(LOG, + (errmsg("%s", + get_recovery_conflict_message(reason, waiting)), + (errdetail_log_plural("Conflicting virtual transaction id: %s.", + "Conflicting virtual transaction ids: %s.", + count, buf.data)))); + pfree(buf.data); + } + else if (!waitlist) + { + ereport(LOG, + (errmsg("%s", + get_recovery_conflict_message(reason, waiting)))); + } +} + /* * This is the main executioner for any query backend that conflicts with * recovery processing. Judgement has already been passed on it within @@ -232,6 +275,7 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, { TimestampTz waitStart = 0; char *new_status; + bool logged = false; /* Fast exit, to avoid a kernel call if there's no work to be done. */ if (!VirtualTransactionIdIsValid(*waitlist)) @@ -249,6 +293,14 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, /* wait until the virtual xid is gone */ while (!VirtualXactLock(*waitlist, false)) { + /* Log the recovery conflict */ + if (log_recovery_conflict_waits && !logged + && TimestampDifferenceExceeds(waitStart, + GetCurrentTimestamp(), DeadlockTimeout)) + { + LogRecoveryConflict(waitlist, reason, report_waiting); + logged = true; + } /* * Report via ps if we have been waiting for more than 500 msec * (should that be configurable?) @@ -370,6 +422,11 @@ ResolveRecoveryConflictWithDatabase(Oid dbid) * block during InitPostgres() and then disconnect when they see the * database has been removed. */ + + /* Log the recovery conflict */ + if (log_recovery_conflict_waits) + LogRecoveryConflict(NULL, PROCSIG_RECOVERY_CONFLICT_DATABASE, false); + while (CountDBBackends(dbid) > 0) { CancelDBBackends(dbid, PROCSIG_RECOVERY_CONFLICT_DATABASE, true); diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 88566bd9fa..8cbd3b39a3 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -1069,6 +1069,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) PGPROC *proc; PGPROC *leader = MyProc->lockGroupLeader; int i; + bool logged; /* * If group locking is in use, locks held by members of my locking group @@ -1275,11 +1276,19 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) * timeout and updating the locallock table, but if we lose control to an * error, LockErrorCleanup will fix that up. */ + logged = false; do { if (InHotStandby) { /* Set a timer and wait for that or for the Lock to be granted */ + VirtualTransactionId *backends; + backends = GetLockConflicts(&locallock->tag.lock, AccessExclusiveLock, NULL); + if (log_recovery_conflict_waits && !logged) { + LogRecoveryConflict(backends, PROCSIG_RECOVERY_CONFLICT_LOCK, true); + logged = true; + } + ResolveRecoveryConflictWithLock(locallock->tag.lock); } else diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 596bcb7b84..e94aa79418 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1570,7 +1570,15 @@ static struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, - + { + {"log_recovery_conflict_waits", PGC_SIGHUP, LOGGING_WHAT, + gettext_noop("Logs standby recovery conflicts resolution."), + NULL + }, + &log_recovery_conflict_waits, + false, + NULL, NULL, NULL + }, { {"log_hostname", PGC_SIGHUP, LOGGING_WHAT, gettext_noop("Logs the host name in the connection logs."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 9cb571f7cc..c2e1af714d 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -553,6 +553,7 @@ # %% = '%' # e.g. '<%u%%%d> ' #log_lock_waits = off # log lock waits >= deadlock_timeout +#log_recovery_conflict_waits = off # log standby recovery conflicts #log_parameter_max_length = -1 # when logging statements, limit logged # bind-parameter values to N bytes; # -1 means print in full, 0 disables diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h index 5cb39697f3..cf903850c4 100644 --- a/src/include/storage/procsignal.h +++ b/src/include/storage/procsignal.h @@ -71,5 +71,6 @@ extern void WaitForProcSignalBarrier(uint64 generation); extern void ProcessProcSignalBarrier(void); extern void procsignal_sigusr1_handler(SIGNAL_ARGS); +extern const char *get_recovery_conflict_message(ProcSignalReason reason, bool waiting); #endif /* PROCSIGNAL_H */ diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h index faaf1d3817..8e3b02f7be 100644 --- a/src/include/storage/standby.h +++ b/src/include/storage/standby.h @@ -23,6 +23,7 @@ extern int vacuum_defer_cleanup_age; extern int max_standby_archive_delay; extern int max_standby_streaming_delay; +extern bool log_recovery_conflict_waits; extern void InitRecoveryTransactionEnvironment(void); extern void ShutdownRecoveryTransactionEnvironment(void); @@ -38,6 +39,7 @@ extern void CheckRecoveryConflictDeadlock(void); extern void StandbyDeadLockHandler(void); extern void StandbyTimeoutHandler(void); extern void StandbyLockTimeoutHandler(void); +extern void LogRecoveryConflict(VirtualTransactionId *waitlist, ProcSignalReason reason, bool waiting); /* * Standby Rmgr (RM_STANDBY_ID)