doc/src/sgml/config.sgml | 15 ++++++
src/backend/access/transam/xlog.c | 3 ++
src/backend/postmaster/pgstat.c | 31 +++++++++++
src/backend/storage/ipc/procsignal.c | 36 +++++++++++++
src/backend/storage/ipc/standby.c | 76 +++++++++++++++++++++++++++
src/backend/tcop/postgres.c | 54 +++++++++++++++++++
src/backend/utils/misc/guc.c | 10 +++-
src/backend/utils/misc/postgresql.conf.sample | 1 +
src/include/access/xlog.h | 1 +
src/include/pgstat.h | 1 +
src/include/storage/procsignal.h | 1 +
src/include/storage/standby.h | 1 +
src/include/tcop/tcopprot.h | 1 +
13 files changed, 230 insertions(+), 1 deletion(-)
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index aca8f73a50..5580ea6978 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6763,6 +6763,21 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
+
+ log_recovery_conflicts (boolean)
+
+ log_recovery_conflicts configuration parameter
+
+
+
+
+ Controls whether detailed information is produced when a conflict
+ occurs during standby recovery. The default is on.
+ Only superusers can change this setting.
+
+
+
+
log_parameter_max_length (integer)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 55cac186dc..d3f923f951 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -880,6 +880,8 @@ static bool holdingAllLocks = false;
#ifdef WAL_DEBUG
static MemoryContext walDebugCxt = NULL;
#endif
+/* For LogBlockedWalRecordInfo */
+XLogReaderState *current_replayed_xlog = NULL;
static void readRecoverySignalFile(void);
static void validateRecoveryParameters(void);
@@ -7319,6 +7321,7 @@ StartupXLOG(void)
TransactionIdIsValid(record->xl_xid))
RecordKnownAssignedTransactionIds(record->xl_xid);
+ current_replayed_xlog = xlogreader;
/* Now apply the WAL record itself */
RmgrTable[record->xl_rmid].rm_redo(xlogreader);
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 309378ae54..513c9cd643 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -6633,6 +6633,37 @@ pgstat_db_requested(Oid databaseid)
return false;
}
+/* ----------
+ * pgstat_get_state_name() -
+ *
+ * Return a string representing the current backend state.
+ * ----------
+ */
+const char *
+pgstat_get_state_name(BackendState backend_state)
+{
+ switch (backend_state)
+ {
+ case STATE_IDLE:
+ return "idle";
+ case STATE_RUNNING:
+ return "active";
+ case STATE_IDLEINTRANSACTION:
+ return "idle in transaction";
+ case STATE_FASTPATH:
+ return "fastpath function call";
+ case STATE_IDLEINTRANSACTION_ABORTED:
+ return "idle in transaction (aborted)";
+ case STATE_DISABLED:
+ return "disabled";
+ case STATE_UNDEFINED:
+ return "";
+ default:
+ Assert(0);
+ }
+ return "";
+}
+
/*
* Convert a potentially unsafely truncated activity string (see
* PgBackendStatus.st_activity_raw's documentation) into a correctly truncated
diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c
index c809196d06..2d5886ff80 100644
--- a/src/backend/storage/ipc/procsignal.c
+++ b/src/backend/storage/ipc/procsignal.c
@@ -576,3 +576,39 @@ procsignal_sigusr1_handler(SIGNAL_ARGS)
errno = save_errno;
}
+
+extern const char *get_procsignal_reason_desc(ProcSignalReason reason)
+ {
+ const char *reasonDesc = "unknown reason";
+
+ switch (reason)
+ {
+ case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
+ reasonDesc = "User was holding shared buffer pin for too long.";
+ break;
+ case PROCSIG_RECOVERY_CONFLICT_LOCK:
+ reasonDesc = "User was holding a relation lock for too long.";
+ break;
+ case PROCSIG_RECOVERY_CONFLICT_TABLESPACE:
+ reasonDesc = "User was or might have been using tablespace that must be dropped.";
+ break;
+ case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
+ reasonDesc = "User query might have needed to see row versions that must be removed.";
+ break;
+ case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+ reasonDesc = "User transaction caused buffer deadlock with recovery.";
+ break;
+ case PROCSIG_RECOVERY_CONFLICT_DATABASE:
+ reasonDesc = "User was connected to a database that must be dropped.";
+ break;
+ case PROCSIG_CATCHUP_INTERRUPT:
+ case PROCSIG_NOTIFY_INTERRUPT:
+ case PROCSIG_PARALLEL_MESSAGE:
+ case PROCSIG_WALSND_INIT_STOPPING:
+ case NUM_PROCSIGNALS:
+ break;
+ default:
+ break;
+ }
+ return reasonDesc;
+}
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index 9e0d5ec257..2ec2b37fcf 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -16,6 +16,7 @@
*-------------------------------------------------------------------------
*/
#include "postgres.h"
+#include "access/rmgr.h"
#include "access/transam.h"
#include "access/twophase.h"
#include "access/xact.h"
@@ -34,6 +35,7 @@
#include "utils/ps_status.h"
#include "utils/timeout.h"
#include "utils/timestamp.h"
+#include "access/xlog_internal.h"
/* User-settable GUC parameters */
int vacuum_defer_cleanup_age;
@@ -215,6 +217,57 @@ WaitExceedsMaxStandbyDelay(uint32 wait_event_info)
return false;
}
+/*
+ * Display information about the wal record
+ * apply being blocked
+ */
+static void
+LogBlockedWalRecordInfo(int num_waitlist_entries, ProcSignalReason reason)
+{
+ RelFileNode rnode;
+ ForkNumber forknum;
+ BlockNumber blknum;
+ uint64 wal_record_lsn;
+ TimestampTz rtime;
+ bool fromStream;
+ char *receipt_time_str;
+ RmgrId rmgrId = -1;
+ uint8 info = 0;
+ const char *rm_name;
+ const char *rm_info;
+ char *wal_record_info_str;
+
+ if (num_waitlist_entries > 0)
+ ereport(LOG, (errmsg("wal record apply is blocked by %d connection(s), reason: %s",
+ num_waitlist_entries, get_procsignal_reason_desc(reason))));
+ else
+ ereport(LOG, (errmsg("wal record apply is blocked, reason: %s",
+ get_procsignal_reason_desc(reason))));
+
+ wal_record_lsn = current_replayed_xlog->ReadRecPtr;
+ rmgrId = XLogRecGetRmid(current_replayed_xlog);
+ info = XLogRecGetInfo(current_replayed_xlog) & ~XLR_INFO_MASK;
+
+ rm_name = RmgrTable[rmgrId].rm_name;
+ rm_info = RmgrTable[rmgrId].rm_identify(info);
+
+ GetXLogReceiptTime(&rtime, &fromStream);
+ receipt_time_str = pstrdup(timestamptz_to_str(rtime));
+ wal_record_info_str = psprintf("blocked wal record rmgr: %s, lsn: %X/%08X, received at: %s, desc: %s", rm_name, (uint32) (wal_record_lsn >> 32), (uint32) wal_record_lsn, receipt_time_str, rm_info);
+
+ if (XLogRecGetBlockTag(current_replayed_xlog, 0, &rnode, &forknum, &blknum))
+ ereport(LOG, (errmsg("%s, relation: rel %u/%u/%u fork %s blk %u",
+ wal_record_info_str,
+ rnode.spcNode, rnode.dbNode, rnode.relNode,
+ forkNames[forknum],
+ blknum)));
+ else
+ ereport(LOG, (errmsg("%s", wal_record_info_str)));
+
+ pfree(wal_record_info_str);
+ pfree(receipt_time_str);
+}
+
/*
* This is the main executioner for any query backend that conflicts with
* recovery processing. Judgement has already been passed on it within
@@ -232,6 +285,8 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
{
TimestampTz waitStart = 0;
char *new_status;
+ VirtualTransactionId *tmpWaitlist;
+ int num_waitlist_entries;
/* Fast exit, to avoid a kernel call if there's no work to be done. */
if (!VirtualTransactionIdIsValid(*waitlist))
@@ -241,6 +296,18 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
waitStart = GetCurrentTimestamp();
new_status = NULL; /* we haven't changed the ps display */
+ tmpWaitlist = waitlist;
+ while (VirtualTransactionIdIsValid(*tmpWaitlist))
+ {
+ tmpWaitlist++;
+ }
+
+ num_waitlist_entries = (tmpWaitlist - waitlist);
+
+ /* display wal record information */
+ if (log_recovery_conflicts)
+ LogBlockedWalRecordInfo(num_waitlist_entries, reason);
+
while (VirtualTransactionIdIsValid(*waitlist))
{
/* reset standbyWait_us for each xact we wait for */
@@ -370,6 +437,11 @@ ResolveRecoveryConflictWithDatabase(Oid dbid)
* block during InitPostgres() and then disconnect when they see the
* database has been removed.
*/
+
+ /* display wal record information */
+ if (log_recovery_conflicts)
+ LogBlockedWalRecordInfo(CountDBBackends(dbid), PROCSIG_RECOVERY_CONFLICT_DATABASE);
+
while (CountDBBackends(dbid) > 0)
{
CancelDBBackends(dbid, PROCSIG_RECOVERY_CONFLICT_DATABASE, true);
@@ -609,6 +681,10 @@ StandbyTimeoutHandler(void)
/* forget any pending STANDBY_DEADLOCK_TIMEOUT request */
disable_timeout(STANDBY_DEADLOCK_TIMEOUT, false);
+ /* display wal record information */
+ if (log_recovery_conflicts)
+ LogBlockedWalRecordInfo(-1, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
+
SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
}
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index c9424f167c..37f1abf802 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -98,6 +98,9 @@ int log_statement = LOGSTMT_NONE;
/* GUC variable for maximum stack depth (measured in kilobytes) */
int max_stack_depth = 100;
+/* GUC variable for standby recovery conflicts information */
+bool log_recovery_conflicts = true;
+
/* wait N seconds to allow attach from a debugger */
int PostAuthDelay = 0;
@@ -126,6 +129,8 @@ char *stack_base_ptr = NULL;
char *register_stack_base_ptr = NULL;
#endif
+#define UINT32_ACCESS_ONCE(var) ((uint32)(*((volatile uint32 *)&(var))))
+
/*
* Flag to keep track of whether we have started a transaction.
* For extended query protocol this has to be remembered across messages.
@@ -191,6 +196,7 @@ static void drop_unnamed_stmt(void);
static void log_disconnections(int code, Datum arg);
static void enable_statement_timeout(void);
static void disable_statement_timeout(void);
+static void LogBlockingWalRecordApplyInfo(void);
/* ----------------------------------------------------------------
@@ -2886,6 +2892,52 @@ FloatExceptionHandler(SIGNAL_ARGS)
"invalid operation, such as division by zero.")));
}
+/*
+ * Display information about the backend
+ * about to be interrupted due to recovery conflict
+ */
+void
+LogBlockingWalRecordApplyInfo(void)
+{
+
+ PgBackendStatus *beentry;
+ int num_backends = pgstat_fetch_stat_numbackends();
+ int curr_backend;
+ uint32 raw_wait_event;
+ const char *wait_event = NULL;
+ const char *activity_start;
+ const char *backend_state_name;
+ const char *backend_type;
+
+ Assert(MyProc != NULL);
+ /*
+ * Make sure any stats snapshot is thrown away
+ */
+ pgstat_clear_snapshot();
+
+ for (curr_backend = 1; curr_backend <= num_backends; curr_backend++)
+ {
+ beentry = pgstat_fetch_stat_beentry(curr_backend);
+ if (beentry->st_procpid != MyProc->pid)
+ continue;
+
+ backend_state_name = pgstat_get_state_name(beentry->st_state);
+ backend_type = GetBackendTypeDesc(beentry->st_backendType);
+ activity_start = timestamptz_to_str(beentry->st_activity_start_timestamp);
+
+ raw_wait_event = UINT32_ACCESS_ONCE(MyProc->wait_event_info);
+ wait_event = pgstat_get_wait_event(raw_wait_event);
+
+ ereport(LOG,
+ (errmsg("about to be interrupted: backend_type: %s, state: %s, wait_event: %s, query_start: %s",
+ backend_type, backend_state_name, wait_event ? wait_event : "none", activity_start)));
+ }
+ /*
+ * Make sure any stats snapshot is thrown away
+ */
+ pgstat_clear_snapshot();
+}
+
/*
* RecoveryConflictInterrupt: out-of-line portion of recovery conflict
* handling following receipt of SIGUSR1. Designed to be similar to die()
@@ -2986,6 +3038,8 @@ RecoveryConflictInterrupt(ProcSignalReason reason)
(int) reason);
}
+ if (log_recovery_conflicts)
+ LogBlockingWalRecordApplyInfo();
Assert(RecoveryConflictPending && (QueryCancelPending || ProcDiePending));
/*
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 2f3e0a70e0..7aa55db5af 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1594,7 +1594,15 @@ static struct config_bool ConfigureNamesBool[] =
false,
NULL, NULL, NULL
},
-
+ {
+ {"log_recovery_conflicts", PGC_SUSET, LOGGING_WHAT,
+ gettext_noop("Logs standby recovery conflicts."),
+ NULL
+ },
+ &log_recovery_conflicts,
+ true,
+ NULL, NULL, NULL
+ },
{
{"log_hostname", PGC_SIGHUP, LOGGING_WHAT,
gettext_noop("Logs the host name in the connection logs."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index ac02bd0c00..1009506eff 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -548,6 +548,7 @@
# %% = '%'
# e.g. '<%u%%%d> '
#log_lock_waits = off # log lock waits >= deadlock_timeout
+#log_recovery_conflicts = on # log standby recovery conflicts
#log_parameter_max_length = -1 # when logging statements, limit logged
# bind-parameter values to N bytes;
# -1 means print in full, 0 disables
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index e917dfe92d..eafd4201db 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -100,6 +100,7 @@ typedef enum
extern XLogRecPtr ProcLastRecPtr;
extern XLogRecPtr XactLastRecEnd;
extern PGDLLIMPORT XLogRecPtr XactLastCommitEnd;
+extern XLogReaderState *current_replayed_xlog;
extern bool reachedConsistency;
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index c55dc1481c..2fca769bc9 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -1334,6 +1334,7 @@ extern PgStat_TableStatus *find_tabstat_entry(Oid rel_id);
extern PgStat_BackendFunctionEntry *find_funcstat_entry(Oid func_id);
extern void pgstat_initstats(Relation rel);
+extern const char *pgstat_get_state_name(BackendState backend_state);
extern char *pgstat_clip_activity(const char *raw_activity);
diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h
index a0c0bc3ce5..c57413b06d 100644
--- a/src/include/storage/procsignal.h
+++ b/src/include/storage/procsignal.h
@@ -70,5 +70,6 @@ extern void WaitForProcSignalBarrier(uint64 generation);
extern void ProcessProcSignalBarrier(void);
extern void procsignal_sigusr1_handler(SIGNAL_ARGS);
+extern const char *get_procsignal_reason_desc(ProcSignalReason reason);
#endif /* PROCSIGNAL_H */
diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h
index cfbe426e5a..0a99ad3f40 100644
--- a/src/include/storage/standby.h
+++ b/src/include/storage/standby.h
@@ -23,6 +23,7 @@
extern int vacuum_defer_cleanup_age;
extern int max_standby_archive_delay;
extern int max_standby_streaming_delay;
+extern bool log_recovery_conflicts;
extern void InitRecoveryTransactionEnvironment(void);
extern void ShutdownRecoveryTransactionEnvironment(void);
diff --git a/src/include/tcop/tcopprot.h b/src/include/tcop/tcopprot.h
index bd30607b07..e73602ca2e 100644
--- a/src/include/tcop/tcopprot.h
+++ b/src/include/tcop/tcopprot.h
@@ -29,6 +29,7 @@ extern CommandDest whereToSendOutput;
extern PGDLLIMPORT const char *debug_query_string;
extern int max_stack_depth;
extern int PostAuthDelay;
+extern bool log_recovery_conflicts;
/* GUC-configurable parameters */