From 65c2776462f6023108b1586afc9b9b17927a5bd9 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Fri, 14 Nov 2025 10:48:35 -0600 Subject: [PATCH v1 3/3] Perodically emit server logs when fewer than 500M remaining transaction IDs. --- src/backend/access/transam/multixact.c | 40 +++++++++++++++++++++++--- src/backend/access/transam/varsup.c | 40 +++++++++++++++++++++++--- src/include/access/transam.h | 5 ++-- 3 files changed, 75 insertions(+), 10 deletions(-) diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 67810ea489a..159ae5efb80 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -264,6 +264,7 @@ typedef struct MultiXactStateData /* support for anti-wraparound measures */ MultiXactId multiVacLimit; + MultiXactId multiLogLimit; MultiXactId multiWarnLimit; MultiXactId multiStopLimit; MultiXactId multiWrapLimit; @@ -1048,6 +1049,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) * If we're past multiVacLimit or the safe threshold for member storage * space, or we don't know what the safe threshold for member storage is, * start trying to force autovacuum cycles. + * If we're past multiLogLimit, start issuing logs periodically. * If we're past multiWarnLimit, start issuing warnings. * If we're past multiStopLimit, refuse to create new MultiXactIds. * @@ -1063,6 +1065,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) * possibility of deadlock while doing get_database_name(). First, * copy all the shared values we'll need in this path. */ + MultiXactId multiLogLimit = MultiXactState->multiLogLimit; MultiXactId multiWarnLimit = MultiXactState->multiWarnLimit; MultiXactId multiStopLimit = MultiXactState->multiStopLimit; MultiXactId multiWrapLimit = MultiXactState->multiWrapLimit; @@ -1106,13 +1109,27 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) if (IsUnderPostmaster && (result % 65536) == 0) SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - if (!MultiXactIdPrecedes(result, multiWarnLimit)) + if (!MultiXactIdPrecedes(result, multiWarnLimit) || + (!MultiXactIdPrecedes(result, multiLogLimit) && + result % 1000000 == 0)) { char *oldest_datname = get_database_name(oldest_datoid); + int elevel; + + /* + * We only send the periodic warnings to the server log in an + * attempt to avoid confusion from clients (since the WARNING will + * disappear for 1M multis at a time). Once the warning limit is + * reached, we emit a proper WARNING every time. + */ + if (!MultiXactIdPrecedes(result, multiWarnLimit)) + elevel = WARNING; + else + elevel = LOG_SERVER_ONLY; /* complain even if that DB has disappeared */ if (oldest_datname) - ereport(WARNING, + ereport(elevel, (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used", "database \"%s\" must be vacuumed before %u more MultiXactIds are used", multiWrapLimit - result, @@ -1123,7 +1140,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) errhint("Execute a database-wide VACUUM in that database.\n" "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); else - ereport(WARNING, + ereport(elevel, (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used", "database with OID %u must be vacuumed before %u more MultiXactIds are used", multiWrapLimit - result, @@ -2299,6 +2316,7 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, bool is_startup) { MultiXactId multiVacLimit; + MultiXactId multiLogLimit; MultiXactId multiWarnLimit; MultiXactId multiStopLimit; MultiXactId multiWrapLimit; @@ -2340,6 +2358,15 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, if (multiWarnLimit < FirstMultiXactId) multiWarnLimit -= FirstMultiXactId; + /* + * We'll start complaining every 1M multis when we get within 500M multis + * of data loss. The idea is to provide an early warning system that is + * less noisy than multiWarnLimit but provides ample time to react. + */ + multiLogLimit = multiWrapLimit - 500000000; + if (multiLogLimit < FirstMultiXactId) + multiLogLimit -= FirstMultiXactId; + /* * We'll start trying to force autovacuums when oldest_datminmxid gets to * be more than autovacuum_multixact_freeze_max_age mxids old. @@ -2357,6 +2384,7 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, MultiXactState->oldestMultiXactId = oldest_datminmxid; MultiXactState->oldestMultiXactDB = oldest_datoid; MultiXactState->multiVacLimit = multiVacLimit; + MultiXactState->multiLogLimit = multiLogLimit; MultiXactState->multiWarnLimit = multiWarnLimit; MultiXactState->multiStopLimit = multiStopLimit; MultiXactState->multiWrapLimit = multiWrapLimit; @@ -2394,7 +2422,11 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, needs_offset_vacuum) && IsUnderPostmaster) SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - /* Give an immediate warning if past the wrap warn point */ + /* + * Give an immediate warning if past the wrap warn point. We don't bother + * with multiLogLimit here, as it's unlikely to apply. We leave that part + * to GetNewMultiXactId() instead. + */ if (MultiXactIdPrecedes(multiWarnLimit, curMulti)) { char *oldest_datname; diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c index 5585381bc8c..74ba958eb7a 100644 --- a/src/backend/access/transam/varsup.c +++ b/src/backend/access/transam/varsup.c @@ -112,6 +112,7 @@ GetNewTransactionId(bool isSubXact) * catastrophic data loss due to XID wraparound. The basic rules are: * * If we're past xidVacLimit, start trying to force autovacuum cycles. + * If we're past xidLogLimit, start issuing logs periodically. * If we're past xidWarnLimit, start issuing warnings. * If we're past xidStopLimit, refuse to execute transactions, unless * we are running in single-user mode (which gives an escape hatch @@ -129,6 +130,7 @@ GetNewTransactionId(bool isSubXact) * possibility of deadlock while doing get_database_name(). First, * copy all the shared values we'll need in this path. */ + TransactionId xidLogLimit = TransamVariables->xidLogLimit; TransactionId xidWarnLimit = TransamVariables->xidWarnLimit; TransactionId xidStopLimit = TransamVariables->xidStopLimit; TransactionId xidWrapLimit = TransamVariables->xidWrapLimit; @@ -165,13 +167,27 @@ GetNewTransactionId(bool isSubXact) errhint("Execute a database-wide VACUUM in that database.\n" "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); } - else if (TransactionIdFollowsOrEquals(xid, xidWarnLimit)) + else if (TransactionIdFollowsOrEquals(xid, xidWarnLimit) || + (TransactionIdFollowsOrEquals(xid, xidLogLimit) && + xid % 1000000 == 0)) { char *oldest_datname = get_database_name(oldest_datoid); + int elevel; + + /* + * We only send the periodic warnings to the server log in an + * attempt to avoid confusion from clients (since the WARNING will + * disappear for 1M transactions at a time). Once the warning + * limit is reached, we emit a proper WARNING every time. + */ + if (TransactionIdFollowsOrEquals(xid, xidWarnLimit)) + elevel = WARNING; + else + elevel = LOG_SERVER_ONLY; /* complain even if that DB has disappeared */ if (oldest_datname) - ereport(WARNING, + ereport(elevel, (errmsg("database \"%s\" must be vacuumed within %u transactions", oldest_datname, xidWrapLimit - xid), @@ -180,7 +196,7 @@ GetNewTransactionId(bool isSubXact) errhint("To avoid transaction ID assignment failures, execute a database-wide VACUUM in that database.\n" "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); else - ereport(WARNING, + ereport(elevel, (errmsg("database with OID %u must be vacuumed within %u transactions", oldest_datoid, xidWrapLimit - xid), @@ -376,6 +392,7 @@ void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid) { TransactionId xidVacLimit; + TransactionId xidLogLimit; TransactionId xidWarnLimit; TransactionId xidStopLimit; TransactionId xidWrapLimit; @@ -424,6 +441,16 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid) if (xidWarnLimit < FirstNormalTransactionId) xidWarnLimit -= FirstNormalTransactionId; + /* + * We'll start complaining every 1M transactions when we get within 500M + * transactions of data loss. The idea is to provide an early warning + * system that is less noisy than xidWarnLimit but provides ample time to + * react. + */ + xidLogLimit = xidWrapLimit - 500000000; + if (xidLogLimit < FirstNormalTransactionId) + xidLogLimit -= FirstNormalTransactionId; + /* * We'll start trying to force autovacuums when oldest_datfrozenxid gets * to be more than autovacuum_freeze_max_age transactions old. @@ -447,6 +474,7 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid) LWLockAcquire(XidGenLock, LW_EXCLUSIVE); TransamVariables->oldestXid = oldest_datfrozenxid; TransamVariables->xidVacLimit = xidVacLimit; + TransamVariables->xidLogLimit = xidLogLimit; TransamVariables->xidWarnLimit = xidWarnLimit; TransamVariables->xidStopLimit = xidStopLimit; TransamVariables->xidWrapLimit = xidWrapLimit; @@ -470,7 +498,11 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid) IsUnderPostmaster && !InRecovery) SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - /* Give an immediate warning if past the wrap warn point */ + /* + * Give an immediate warning if past the wrap warn point. We don't bother + * with xidLogLimit here, as it's unlikely to apply. We leave that part + * to GetNewTransactionId() instead. + */ if (TransactionIdFollowsOrEquals(curXid, xidWarnLimit) && !InRecovery) { char *oldest_datname; diff --git a/src/include/access/transam.h b/src/include/access/transam.h index c9e20418275..a1bd4259f86 100644 --- a/src/include/access/transam.h +++ b/src/include/access/transam.h @@ -203,8 +203,8 @@ FullTransactionIdAdvance(FullTransactionId *dest) * LWLocks. * * Note: xidWrapLimit and oldestXidDB are not "active" values, but are - * used just to generate useful messages when xidWarnLimit or xidStopLimit - * are exceeded. + * used just to generate useful messages when xidLogLimit, xidWarnLimit, or + * xidStopLimit are exceeded. */ typedef struct TransamVariablesData { @@ -221,6 +221,7 @@ typedef struct TransamVariablesData TransactionId oldestXid; /* cluster-wide minimum datfrozenxid */ TransactionId xidVacLimit; /* start forcing autovacuums here */ + TransactionId xidLogLimit; /* start logging periodically here */ TransactionId xidWarnLimit; /* start complaining here */ TransactionId xidStopLimit; /* refuse to advance nextXid beyond here */ TransactionId xidWrapLimit; /* where the world ends */ -- 2.39.5 (Apple Git-154)