From fde0d4c4745d350ebf4b50e361683de532f2c283 Mon Sep 17 00:00:00 2001 From: Srinath Reddy Sadipiralla Date: Wed, 17 Jun 2026 20:58:43 +0530 Subject: [PATCH 1/1] Fix "missing chunk" errors during heap rewrites by ignoring own xmin. When performing a heap rewrite (CLUSTER, VACUUM FULL, REPACK), the backend must hold an MVCC snapshot to evaluate index expressions against other relations. However, this backend does not use this snapshot to read the historical rows of the relation currently being rewritten. Previously, vacuum_get_cutoffs() used GetOldestNonRemovableTransactionId(), which folds the backend's own xmin into the relation's OldestXmin horizon. If this xmin is held back by a transaction that cannot even see the target relation (e.g., a transaction in another database), the rewrite's horizon becomes artificially conservative. This creates a race condition with lazy vacuum. Lazy vacuum sets the PROC_IN_VACUUM flag, allowing it to exclude its own xmin from the global horizon. Consequently, a concurrent or prior lazy vacuum could calculate a newer horizon and safely remove a recently-dead tuple's TOAST chunks. When the heap rewrite later scans the heap, its artificially older horizon dictates that the recently-dead tuple must be preserved. It attempts to copy the tuple, fails to find the TOAST chunks, and raises a "missing chunk number 0 for toast value" error. This patch fixes the issue by introducing GetOldestNonRemovableTransactionIdForRewrite(). This function calls ComputeXidHorizons() with a new `excludeMyself` boolean parameter. By excluding MyProc from the calculation, the rewrite generates the exact same horizon that a lazy vacuum would use. Also, we skip calling GlobalVisUpdateApply() when excludeMyself is true. This prevents the artificially aggressive local horizon from being cached in the backend's local GlobalVisState. If this fake horizon were cached, it would poison the backend's own pruning logic. When the rewrite later accesses other relations to evaluate index expressions, this poisoned cache would cause the backend to physically prune tuples that its own snapshot still strictly requires. --- src/backend/commands/repack.c | 15 ++++++ src/backend/storage/ipc/procarray.c | 80 ++++++++++++++++++++++++++--- src/include/storage/procarray.h | 1 + 3 files changed, 89 insertions(+), 7 deletions(-) diff --git a/src/backend/commands/repack.c b/src/backend/commands/repack.c index ec100e3eef5..93ec67f8aaf 100644 --- a/src/backend/commands/repack.c +++ b/src/backend/commands/repack.c @@ -69,6 +69,7 @@ #include "storage/lmgr.h" #include "storage/predicate.h" #include "storage/proc.h" +#include "storage/procarray.h" #include "utils/acl.h" #include "utils/fmgroids.h" #include "utils/guc.h" @@ -1380,6 +1381,20 @@ copy_table_data(Relation NewHeap, Relation OldHeap, Relation OldIndex, memset(¶ms, 0, sizeof(VacuumParams)); vacuum_get_cutoffs(OldHeap, ¶ms, &cutoffs); + /* + * vacuum_get_cutoffs() folds our own backend's xmin into OldestXmin. For + * a rewrite that is too conservative: the snapshot we hold exists only to + * evaluate index expressions against other relations, not to read + * OldHeap's historical rows. If our xmin is held back by a transaction + * that cannot even see OldHeap (e.g. one in another database), we would + * preserve a recently-dead tuple whose TOAST chunks a concurrent or prior + * lazy vacuum was free to remove, and then fail with "missing chunk" while + * copying it. Recompute OldestXmin ignoring our own backend so it matches + * the horizon lazy vacuum uses. This can only move OldestXmin forward, so + * the freeze cutoffs derived above remain valid. + */ + cutoffs.OldestXmin = GetOldestNonRemovableTransactionIdForRewrite(OldHeap); + /* * FreezeXid will become the table's new relfrozenxid, and that mustn't go * backwards, so take the max. diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index f540bb6b23f..e2f1ea08e84 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -1671,7 +1671,7 @@ TransactionIdIsInProgress(TransactionId xid) * code doesn't expect (breaking HOT). */ static void -ComputeXidHorizons(ComputeXidHorizonsResult *h) +ComputeXidHorizons(ComputeXidHorizonsResult *h, bool excludeMyself) { ProcArrayStruct *arrayP = procArray; TransactionId kaxmin; @@ -1770,6 +1770,21 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) if (statusFlags & (PROC_IN_VACUUM | PROC_IN_LOGICAL_DECODING)) continue; + /* + * Optionally ignore our own backend's xmin. A heap rewrite (CLUSTER / + * VACUUM FULL / REPACK) holds an MVCC snapshot only so that index + * expressions can be evaluated against other relations; it never + * reads the historical rows of the relation being rewritten through + * that snapshot. Letting our own xmin hold back that relation's + * removal horizon makes the rewrite more conservative than the lazy + * vacuum that may already have removed a recently-dead tuple's TOAST + * chunks, which would then fail with "missing chunk" while copying the + * tuple. This mirrors how PROC_IN_VACUUM excludes a lazy vacuum's own + * xmin. See GetOldestNonRemovableTransactionIdForRewrite(). + */ + if (excludeMyself && proc == MyProc) + continue; + /* shared tables need to take backends in all databases into account */ h->shared_oldest_nonremovable = TransactionIdOlder(h->shared_oldest_nonremovable, xmin); @@ -1898,8 +1913,15 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) TransactionIdPrecedesOrEquals(h->oldest_considered_running, h->slot_catalog_xmin)); - /* update approximate horizons with the computed horizons */ - GlobalVisUpdateApply(h); + /* + * Update approximate horizons with the computed horizons. Skip this when + * excluding our own xmin: the resulting horizons are more aggressive than + * is globally safe and are only valid for the caller's own rewrite of a + * single relation, so they must not be published to the shared + * GlobalVisState. + */ + if (!excludeMyself) + GlobalVisUpdateApply(h); } /* @@ -1945,7 +1967,51 @@ GetOldestNonRemovableTransactionId(Relation rel) { ComputeXidHorizonsResult horizons; - ComputeXidHorizons(&horizons); + ComputeXidHorizons(&horizons, false); + + switch (GlobalVisHorizonKindForRel(rel)) + { + case VISHORIZON_SHARED: + return horizons.shared_oldest_nonremovable; + case VISHORIZON_CATALOG: + return horizons.catalog_oldest_nonremovable; + case VISHORIZON_DATA: + return horizons.data_oldest_nonremovable; + case VISHORIZON_TEMP: + return horizons.temp_oldest_nonremovable; + } + + /* just to prevent compiler warnings */ + return InvalidTransactionId; +} + +/* + * GetOldestNonRemovableTransactionIdForRewrite -- variant of + * GetOldestNonRemovableTransactionId() that ignores the calling backend's + * own xmin. + * + * A heap rewrite (CLUSTER / VACUUM FULL / REPACK) must preserve recently-dead + * tuples of the relation being rewritten so that other backends' snapshots + * still see them afterwards. It is not, however, a reader of that relation's + * historical rows itself: the MVCC snapshot it holds exists only to evaluate + * index expressions against *other* relations. Including our own xmin in the + * horizon therefore makes the rewrite needlessly conservative, and -- when our + * xmin is held back by a transaction that cannot even see the relation (for + * example one in another database) -- more conservative than the lazy vacuum + * which already removed a recently-dead tuple's TOAST chunks, leading to + * "missing chunk" errors while copying the tuple. Excluding our own xmin + * yields the same horizon a lazy vacuum would use (PROC_IN_VACUUM does the + * equivalent for lazy vacuum). + * + * The caller must hold a lock on rel strong enough that the set of backends + * that could read its rows cannot change underneath us. + */ +TransactionId +GetOldestNonRemovableTransactionIdForRewrite(Relation rel) +{ + ComputeXidHorizonsResult horizons; + + ComputeXidHorizons(&horizons, true); switch (GlobalVisHorizonKindForRel(rel)) { @@ -1974,7 +2040,7 @@ GetOldestTransactionIdConsideredRunning(void) { ComputeXidHorizonsResult horizons; - ComputeXidHorizons(&horizons); + ComputeXidHorizons(&horizons, false); return horizons.oldest_considered_running; } @@ -1987,7 +2053,7 @@ GetReplicationHorizons(TransactionId *xmin, TransactionId *catalog_xmin) { ComputeXidHorizonsResult horizons; - ComputeXidHorizons(&horizons); + ComputeXidHorizons(&horizons, false); /* * Don't want to use shared_oldest_nonremovable here, as that contains the @@ -4206,7 +4272,7 @@ GlobalVisUpdate(void) ComputeXidHorizonsResult horizons; /* updates the horizons as a side-effect */ - ComputeXidHorizons(&horizons); + ComputeXidHorizons(&horizons, false); } /* diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h index d718a5b542f..ccfc31cdb5d 100644 --- a/src/include/storage/procarray.h +++ b/src/include/storage/procarray.h @@ -51,6 +51,7 @@ extern RunningTransactions GetRunningTransactionData(void); extern bool TransactionIdIsInProgress(TransactionId xid); extern TransactionId GetOldestNonRemovableTransactionId(Relation rel); +extern TransactionId GetOldestNonRemovableTransactionIdForRewrite(Relation rel); extern TransactionId GetOldestTransactionIdConsideredRunning(void); extern TransactionId GetOldestActiveTransactionId(bool inCommitOnly, bool allDbs); -- 2.43.0