From 7ff7631dc90b3c057ccf1093e08462a481db8f9a Mon Sep 17 00:00:00 2001
From: Greg Burd <greg@burd.me>
Date: Wed, 17 Jun 2026 17:35:17 -0400
Subject: [PATCH v48 4/9] Add HOT-indexed updates: selective index maintenance
 and reads

Implement the HOT-indexed (Selective Index Update) feature on the foundation
laid by the executor's modified-attribute identification.

Eligibility: HeapUpdateHotAllowable returns a HeapUpdateIndexMode --
HEAP_UPDATE_ALL_INDEXES (not HOT; every index needs an entry), HEAP_UPDATE_HOT
(classic HOT; no index needs an entry), or HEAP_SELECTIVE_INDEX_UPDATE (HOT
chain, only the changed indexes maintained) -- computed from modified_idx_attrs
and the per-relation indexed-attribute set (RelationGetIndexedAttrs).  An
UPDATE that changes a non-summarizing indexed attribute is
HEAP_SELECTIVE_INDEX_UPDATE unless it is forced to HEAP_UPDATE_ALL_INDEXES by
one of: every indexed attribute changed (nothing to skip), an attribute
referenced by an expression index changed (expression-aware maintenance is not
implemented yet), a system catalog, or the logical-replication apply gate (see
the apply-gating commit).  Partial indexes, exclusion constraints, partitioned
tables, and non-btree access methods are all eligible -- the read path is
access-method agnostic and the predicate column is part of the index's
attribute set, so no carve-out is needed for them.

Write path: the table-AM update contract carries modified attributes IN/OUT as
a Bitmapset (on output the AM adds the whole-row sentinel,
TableTupleUpdateAllIndexes, to signal "every index needs an entry"), and
heap_update, for HEAP_SELECTIVE_INDEX_UPDATE, keeps the new version on the HOT
chain while ExecInsertIndexTuples maintains only the indexes whose attributes
changed.  The new heap-only tuple records, in an inline bitmap in its tail, the
attributes that changed at its hop.  Only the stored tuple carries the bitmap
and the HEAP_INDEXED_UPDATED flag; the caller's in-memory copy is left unmarked
so the flag never promises a trailing bitmap that is not present.

Read path: a chain walk to the live tuple unions the modified-attribute
bitmaps of every hop it crosses.  The index-access layer treats that
crossed-attribute bitmap as the staleness authority: if it overlaps the
arriving index's key columns the entry is stale and is dropped, and the row is
re-supplied by the fresh entry the same update planted.  The read path is
access-method agnostic and needs no value recheck or leaf key: it is correct
even when a key is cycled away and back, because the value-restoring update
planted a fresh entry whose walk crosses no later key-changing hop.

Unique checks are the one place that does compare values: _bt_check_unique
fetches the conflicting tuple under SnapshotDirty and, on a crossed-hop
arrival, compares the live tuple's current key against the arriving leaf with
the index's own ordering procedure (_bt_heap_keys_equal_leaf, BTORDER_PROC
under each column's collation).  Using the opclass comparator -- not a bitwise
image comparison -- distinguishes a stale ancestor leaf from a genuinely live
duplicate (equal under the opclass even if not bitwise-identical) and, in the
in-flight window of a restoring update, routes the stale-ancestor hit into
_bt_doinsert's xwait so the duplicate is still caught.  The comparison reads
plain key columns straight from the heap slot; it never evaluates an indexed
expression, because an UPDATE touching an expression-index attribute is
ineligible for HOT-indexed, so an expression index is never the one receiving
the fresh entry whose insert runs this check.

Co-authored-by: Greg Burd <greg@burd.me>
Co-authored-by: Nathan Bossart <nathandbossart@gmail.com>
---
 src/backend/access/heap/README.HOT           |  34 +
 src/backend/access/heap/README.HOT-INDEXED   | 306 ++++++++
 src/backend/access/heap/heapam.c             | 469 ++++++++---
 src/backend/access/heap/heapam_handler.c     | 189 +++--
 src/backend/access/heap/heapam_indexscan.c   | 173 ++++-
 src/backend/access/index/genam.c             |   3 +
 src/backend/access/index/indexam.c           |  99 ++-
 src/backend/access/nbtree/nbtinsert.c        | 168 +++-
 src/backend/access/nbtree/nbtree.c           |  13 +-
 src/backend/access/table/tableam.c           |  24 +-
 src/backend/catalog/indexing.c               |  67 +-
 src/backend/catalog/toasting.c               |   2 -
 src/backend/commands/repack.c                |  26 +-
 src/backend/executor/execIndexing.c          | 327 ++++----
 src/backend/executor/execReplication.c       |  38 +-
 src/backend/executor/nodeIndexonlyscan.c     |  33 +
 src/backend/executor/nodeIndexscan.c         |  15 +
 src/backend/executor/nodeModifyTable.c       |  73 +-
 src/backend/nodes/makefuncs.c                |   2 -
 src/backend/utils/activity/pgstat_relation.c |  21 +-
 src/backend/utils/cache/relcache.c           | 219 +++++-
 src/include/access/amapi.h                   |   2 +-
 src/include/access/heapam.h                  |  48 +-
 src/include/access/relscan.h                 |  60 ++
 src/include/access/tableam.h                 |  71 +-
 src/include/executor/executor.h              |   5 +-
 src/include/nodes/execnodes.h                |  10 +-
 src/include/pgstat.h                         |  31 +-
 src/include/utils/rel.h                      |  11 +
 src/include/utils/relcache.h                 |  17 +
 src/test/regress/expected/hot_updates.out    | 771 +++++++------------
 src/test/regress/sql/hot_updates.sql         | 603 +++++----------
 src/tools/pgindent/typedefs.list             |   2 +-
 33 files changed, 2557 insertions(+), 1375 deletions(-)

diff --git a/src/backend/access/heap/README.HOT b/src/backend/access/heap/README.HOT
index 74e407f375a..7123656173c 100644
--- a/src/backend/access/heap/README.HOT
+++ b/src/backend/access/heap/README.HOT
@@ -156,6 +156,40 @@ all summarizing indexes.  (Realistically, we only need to propagate the
 update to the indexes that contain the updated values, but that is yet to
 be implemented.)
 
+
+Per-Index Update Tracking
+-------------------------
+
+After the table AM performs the update, the executor determines which
+indexes need new entries using per-index tracking.
+
+The table AM communicates whether a HOT update occurred via the
+update_all_indexes boolean output of table_tuple_update(), together with the
+modified-attrs Bitmapset the caller passed in (attribute numbers encoded with
+FirstLowInvalidHeapAttributeNumber).  When update_all_indexes is true the
+update was non-HOT and every index requires a new entry (the tuple has a new
+TID).  When false the update was HOT: the caller consults modified_attrs with
+each index's own attributes to insert entries only into the indexes whose key
+attributes changed (a HOT-indexed update) or only the summarizing indexes (a
+classic HOT update that changed a summarized column), and skips the rest.
+
+The executor then calls ExecSetIndexUnchanged() to populate the per-index
+ii_IndexUnchanged flag on each IndexInfo.  This flag indicates whether each
+index's key values are unchanged by the update.  For non-HOT updates
+the flag is cleared on every index, so each gets a fresh entry at the
+new TID; the flag is never a skip on its own, just a hint to the
+index AM's aminsert for optimizations such as bottom-up deletion of
+logically equivalent duplicate entries.
+
+ExecInsertIndexTuples consults ii_IndexUnchanged to decide whether to
+skip a non-summarizing index during an UPDATE: if the index is marked
+unchanged, the HOT chain root's existing entry still points at the
+tuple, so no new entry is needed.  For non-HOT updates the TID
+changed and ExecSetIndexUnchanged marks every index as changed,
+forcing each to receive a new entry.  Summarizing indexes always get
+the opportunity to update their block-level summaries.
+
+
 Abort Cases
 -----------
 
diff --git a/src/backend/access/heap/README.HOT-INDEXED b/src/backend/access/heap/README.HOT-INDEXED
index 4b701e42586..5d4a2c7d66c 100644
--- a/src/backend/access/heap/README.HOT-INDEXED
+++ b/src/backend/access/heap/README.HOT-INDEXED
@@ -54,3 +54,309 @@ The HOT-indexed invariant (the new contract)
 
 This is what makes dropping a stale entry safe: the live row is always
 reachable through exactly one non-stale entry per index.
+
+
+Eligibility: HeapUpdateHotAllowable
+-----------------------------------
+
+The executor computes modified_idx_attrs (the indexed attributes this UPDATE
+changed, attribute numbers offset by FirstLowInvalidHeapAttributeNumber) and
+passes it to heap_update via table_tuple_update.  HeapUpdateHotAllowable
+classifies the update:
+
+  HEAP_UPDATE_ALL_INDEXES
+        HOT is not permitted; the new tuple goes on a fresh TID and every
+        index gets a new entry.
+  HEAP_HEAP_ONLY_UPDATE
+        no non-summarizing indexed attribute changed, so no index needs a
+        new entry (classic HOT).
+  HEAP_SELECTIVE_INDEX_UPDATE
+        at least one non-summarizing index's attribute changed, but the
+        update may stay on the HOT chain and maintain only the changed
+        indexes selectively.
+
+A non-summarizing indexed attribute changing yields HEAP_SELECTIVE_INDEX_UPDATE
+unless one of these forces HEAP_UPDATE_ALL_INDEXES:
+
+  1. The logical-replication apply path, gated per subscription (see "Logical
+     replication" below).
+  2. An UPDATE touching an attribute referenced by an expression index
+     (selective maintenance of expression indexes is not implemented yet).
+  3. An UPDATE that changes *every* indexed attribute: there is no index to
+     skip, so a plain non-HOT update is cheaper.
+
+System catalogs stay classic-HOT only: a catalog UPDATE that changes a
+non-summarizing indexed attribute falls back to HEAP_UPDATE_ALL_INDEXES, because
+catalog reads go through many paths not all proven safe against stale chain
+entries.  This is the pre-HOT-indexed behaviour for such updates.
+
+INDEX_ATTR_BITMAP_INDEXED (cached in rd_indexedattr) is the set of columns
+referenced by non-summarizing indexes plus, folded in, the columns referenced
+only by summarizing indexes, so that a change to a summarizing-only column is
+seen by the modified-attribute comparison (its index is maintained via the
+classic-HOT summarizing path).  Read-side staleness is filtered by the
+crossed-attribute bitmap, which is access-method agnostic, so a change to a
+column covered by any index is HOT-indexed regardless of the index's access
+method.  Summarizing indexes (e.g. BRIN) keep no per-row leaf that can go
+stale and are maintained via the summarizing path.
+
+
+The write path
+--------------
+
+For HEAP_SELECTIVE_INDEX_UPDATE, heap_update:
+
+  - stores the new tuple as a heap-only tuple on the same page, linked into
+    the chain via t_ctid, exactly like classic HOT; and
+  - sets HEAP_INDEXED_UPDATED (t_infomask2 bit 0x0800) on the new tuple to
+    mark that the chain now carries differing keys.
+
+There is no separate on-page meta-item: the bit on the heap-only tuple is the
+entire on-disk footprint.  As for classic HOT, if the new tuple does not fit
+on the page the update falls back to a non-HOT (new-page) update.
+
+The inline modified-attrs bitmap is ceil(natts/8) bytes, sized by the tuple's
+OWN attribute count at write time (HeapTupleHeaderGetNatts), not the relation's
+current natts.  ADD COLUMN raises the relation's natts without rewriting
+existing tuples, so one chain can hold hops whose bitmaps were sized for
+different (smaller) natts; every consumer locates and sizes a hop's bitmap
+from that hop's own write-time natts (HotIndexedTupleBitmapNatts in
+access/hot_indexed.h).  A collapse-survivor stub overwrites natts with its 0
+sentinel, so it preserves its write-time natts in the unused block-number half
+of t_ctid (the offset half is the forward link).  Bit positions are attribute
+based and identical across sizes, so a smaller bitmap simply ORs into the low
+bytes of a larger crossed-attribute accumulator.  DROP COLUMN keeps the attnum
+slot (it never renumbers), so existing bitmaps stay aligned.
+
+After the update, table_tuple_update reports update_all_indexes = false (the
+tuple is heap-only).  The executor then maintains indexes selectively:
+ExecSetIndexUnchanged marks each index whose key attributes did not change as
+unchanged, and ExecInsertIndexTuples inserts a fresh entry only into the
+indexes that did change.  Each such entry points at the new tuple's own TID.
+
+
+The chain and the two kinds of leaf entry
+------------------------------------------
+
+After a HOT-indexed update there are, for a changed index, two kinds of leaf
+entry reaching the chain:
+
+  - the pre-update entry for the OLD key, still pointing at an older chain
+    member (now stale once the walk crosses the HOT-indexed hop); and
+  - the fresh entry for the NEW key, pointing at the new heap-only tuple.
+
+Index build and REINDEX index a live HOT-indexed tuple under its OWN TID (not
+the chain root), so the freshly built entry has no hop after it and is never
+treated as stale.
+
+
+Read-side correctness: the crossed-attribute bitmap
+---------------------------------------------------
+
+heap_hot_search_buffer walks the chain from the entry's target to the live
+visible tuple.  Each hop it crosses after the entry's own target -- a live
+HOT-indexed member, a collapse-survivor stub, or a collapsed (redirected)
+prefix -- contributes that hop's inline modified-attrs bitmap to a running
+union, IndexFetchTableData.xs_hot_indexed_crossed, and sets
+*hot_indexed_recheck to flag that the walk crossed at least one such hop.
+
+The index-access layer (index_fetch_heap) tests that union against the
+arriving index's key columns.  Any overlap means a crossed hop changed one of
+this index's inputs, so the entry's stored key no longer matches the live
+tuple: IndexScanDesc.xs_hot_indexed_stale is set, and IndexScan,
+IndexOnlyScan, CLUSTER, and the logical-replication replica-identity lookups
+drop the tuple.  If the union is disjoint from the index's key columns, none
+of the index's inputs changed across the chain, so the entry is current and
+the row is returned.
+
+The union is complete: every crossed live hop and stub contributes its
+bitmap, and chain collapse only ever reclaims a member whose attributes are a
+subset of the surviving later hops (see "Prune and chain collapse"), so a
+reader crossing the survivors still sees every collapsed hop's attributes.
+Disjointness therefore reliably means the entry is current.
+
+This needs no value comparison and no leaf key, so it serves equality, range,
+and inequality scans uniformly, works for any access method whose columns are
+eligible for HOT-indexed updates, and is correct even when a key is cycled
+away and back (X -> Y -> X): the update that restored the value planted a
+fresh entry pointing at its own live tuple, whose walk crosses no later
+key-changing hop, so that entry uniquely returns the row while the stale
+ancestor entry -- whose walk does cross the changing hops -- is dropped.
+
+The read mechanism never reconstructs or compares an index key, so it needs no
+per-access-method support.  (nbtree keeps an internal leaf-key comparison,
+_bt_heap_keys_equal_leaf, used only by _bt_check_unique to tell a stale chain
+entry from a live duplicate during a unique insert; it is not part of the read
+path.)
+
+Unique checks.  _bt_check_unique fetches the conflicting tuple under
+SnapshotDirty and, when the chain walk crossed a HOT-indexed hop, compares the
+live tuple's current key against the arriving leaf with the index's own
+ordering procedure (_bt_heap_keys_equal_leaf, using BTORDER_PROC under each
+column's collation).  This recheck is reached only for an index receiving a
+fresh entry during a HOT-indexed update; HeapUpdateHotAllowable disqualifies
+any UPDATE that touches an expression-index attribute, so the index here never
+has an expression key column (every key column is a plain attribute), and the
+comparison reads attribute values straight from the heap slot -- no expression
+evaluation or executor state is needed.  Using the opclass comparator -- not a
+bitwise image comparison -- means a key
+that was cycled away and back (X -> Y -> X) does not raise a spurious
+duplicate against its own stale leaf, while a genuinely live duplicate (equal
+under the opclass even if not bitwise-identical, e.g. numeric 1.0 vs 1.00) is
+still detected.  (Appendix A motivates this recheck in detail.)
+
+
+Appendices
+----------
+
+Appendix A: Why the unique-check path needs a value comparison at all
+---------------------------------------------------------------------
+
+This is the one place HOT-indexed does compare a key value, even though the
+read path deliberately avoids one.  The rest of this appendix explains why the
+comparison is needed, why it must use the opclass comparator rather than a
+bitwise one, and why the ABA case is what forces the issue.
+
+1. The setup: why a unique insert can even reach a stale leaf
+
+Under classic HOT, an index has exactly one leaf entry per logical row, and
+every leaf entry's key matches the live tuple it chain-resolves to.  So
+_bt_check_unique can trust: "if I find a leaf whose key equals my new key, and
+it resolves to a live tuple, that's a genuine duplicate."
+
+HOT/SIU breaks that one-to-one correspondence.  A HOT-indexed UPDATE that
+changes column a from X to Y:
+ - inserts a fresh leaf entry (Y -> new tuple) into idx_a, and
+ - leaves the old leaf entry (X -> old chain root) in place.
+
+That old (X -> root) entry is now stale: it still chain-resolves to a live
+tuple, but that live tuple's current a is Y, not X.  The read path handles this
+with the crossed-attribute bitmap (no value comparison needed): if the walk
+from the entry's target to the live tuple crosses a hop that changed a, the
+entry is stale and dropped.
+
+When you now INSERT a row with a = X, _bt_check_unique scans idx_a for key X
+and finds that stale (X -> root) leaf.  It must decide: is this a real
+conflict?
+
+2. Why the read-path bitmap is not sufficient here
+
+The read path's logic is: "this entry crossed a hop that changed a => stale =>
+drop it, the fresh entry will supply the row."  For scans that's correct and
+complete, because every live row has exactly one non-stale entry that
+re-supplies it.
+
+But a unique check is asking a different question.  It is not "should I return
+this row?" -- it is "does the live tuple this entry resolves to conflict with
+the key I'm inserting?"  The bitmap can only tell you "an indexed attribute
+changed somewhere on the chain."  It cannot tell you what the live value is
+now, and that is exactly what you need to know to detect a duplicate.
+
+This is the crux of the ABA problem.  Consider:
+
+  INSERT (a=10)                  LP[1] a=10   (root)
+  UPDATE a=11   (HOT-indexed)    LP[2] a=11   bitmap {a}, leaf (11)->LP[2]
+  UPDATE a=10   (HOT-indexed)    LP[3] a=10   bitmap {a}, leaf (10)->LP[3], live
+
+idx_a now has leaves (10)->LP[1] [stale ancestor], (11)->LP[2] [stale], and
+(10)->LP[3] [fresh, live].
+
+Now INSERT (a=10), a genuine duplicate of the live row.  _bt_check_unique scans
+for key 10 and finds the (10)->LP[1] stale ancestor entry.  The chain walk from
+LP[1] to the live tuple LP[3] crosses hops that changed a (10->11, then
+11->10), so the bitmap says "stale."  If the unique check trusted the bitmap
+alone it would skip (10)->LP[1] as stale and miss the real duplicate.  The
+bitmap is fooled because a changed (so the bit is set) even though it changed
+back to the same value: "an attribute changed on the chain" is not "the live
+value differs from this leaf's key."  Under ABA they diverge.
+
+The sharper case is concurrency.  While the restoring UPDATE (a: 11 -> 10) is
+in flight, it has written its new heap tuple but not yet inserted the fresh
+(10)->LP[3] leaf.  A concurrent INSERT (a=10) running its _bt_check_unique scan
+in that window sees only the stale (10)->LP[1] ancestor.  The value recheck
+below makes that hit resolve to xwait on the in-flight updater (via
+_bt_doinsert's wait-and-recheck), so the inserter re-checks after the updater
+commits and finds the conflict.  A bitmap-only verdict would skip the ancestor
+before reaching the xwait logic and admit a duplicate -- which is why the
+recheck is a correctness requirement, not merely an optimization.
+
+3. Why a value comparison fixes it, and why it must be the opclass comparator
+
+So the unique path needs to look at the actual live value, not just "did
+something change."  _bt_check_unique fetches the conflicting tuple under
+SnapshotDirty and, when hi_recheck says a HOT-indexed hop was crossed, calls
+_bt_heap_keys_equal_leaf to compare the live tuple's current key against the
+arriving leaf's stored key:
+
+ - live key equals the leaf key -> genuine duplicate (or an in-flight conflict
+   reached as xwait) -- correct: ABA back to X is a real conflict with a new X.
+ - live key differs -> the leaf is truly stale -> skip it (the fresh entry
+   handles the real row).
+
+Which equality?  Two candidates:
+
+Bitwise/image comparison (datum_image_eq) compares raw bytes.  That is wrong
+for unique checking in the dangerous direction.  Uniqueness in PostgreSQL is
+defined by the index opclass's equality operator, not byte identity, and many
+types have values equal under the opclass but byte-distinct:
+ - numeric: 1.0 and 1.00 are opclass-equal, different on-disk bytes.
+ - float8: -0.0 and +0.0 are equal, different bit patterns.
+ - text/citext under a nondeterministic collation: canonically-equivalent
+   strings that are not byte-identical.
+
+A bitwise comparison would conclude "not equal => stale => skip" for a live
+1.00 versus an inserted 1.0 and miss a genuine violation -- a correctness hole
+as bad as the ABA one.
+
+So _bt_heap_keys_equal_leaf uses the index's own BTORDER_PROC (btree support
+function 1) under each key column's collation, the same machinery _bt_compare
+and _bt_mkscankey use to define equality for the index.  A zero result means
+"equal as the index defines equality," which is precisely the unique-violation
+condition, and the verdict agrees with the index's own notion of uniqueness in
+both directions.
+
+4. Why no expression evaluation is needed
+
+_bt_heap_keys_equal_leaf reads each key column straight from the heap slot
+(slot_getattr) and compares it to the leaf datum; it does not evaluate indexed
+expressions and needs no executor state.  That is sufficient because the
+recheck is only ever reached for an index receiving a fresh entry during a
+HOT-indexed update, and HeapUpdateHotAllowable disqualifies any UPDATE that
+touches an attribute referenced by an expression index
+(INDEX_ATTR_BITMAP_EXPRESSION captures every such attribute).  So a HOT-indexed
+chain never has a crossed hop affecting an expression index, the index reaching
+the recheck never has an expression key column (every indkey is a real
+attribute number), and there is nothing to evaluate.  If selective maintenance
+of expression indexes is implemented in the future, this is where an
+expression-evaluating comparison (e.g. FormIndexDatum) would be reintroduced.
+
+5. Why the asymmetry (bitmap on read, value recheck on unique) is intentional
+
+It looks like two different answers to the same question, but the questions
+differ:
+
+ - Read/scan path: "should this row be returned?"  A stale entry is redundant
+   (the fresh entry supplies the row), so the conservative bitmap verdict is
+   sufficient -- worst case under ABA you drop a redundant entry and the fresh
+   one still returns the row.  No value comparison, so reads stay
+   access-method-agnostic and cheap.
+ - Unique-check path: "is this a conflict?"  A wrong "stale" verdict here does
+   not just drop a redundant entry; it silently admits a duplicate, corrupting
+   the constraint.  It cannot tolerate the bitmap's false "stale" under ABA and
+   must consult the live value (or wait on an in-flight updater) via the
+   opclass comparator.
+
+The bitmap is a filter (a necessary condition: "could be stale"); the opclass
+recheck is the authority (the sufficient condition: "is the live key actually
+different, or is a conflicting update in flight").  The unique path layers the
+authority on top of the filter precisely because its error mode is
+unforgiving.
+
+In one sentence: the unique check compares the live tuple's current key to the
+arriving leaf with the index's own equality (not bytes) because the
+crossed-attribute bitmap can only say "something changed" -- true under an
+X->Y->X cycle even though the value is back to X -- and only an opclass-correct
+value comparison (which also routes an in-flight restoring update to xwait) can
+both recognize the cycled-back value as a genuine duplicate and catch
+duplicates that are opclass-equal but not byte-identical, either of which a
+bitmap or a bitwise comparison would get wrong.
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index dc307eb6dc4..90a5aaa51b3 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -34,6 +34,7 @@
 #include "access/heapam.h"
 #include "access/heaptoast.h"
 #include "access/hio.h"
+#include "access/hot_indexed.h"
 #include "access/multixact.h"
 #include "access/subtrans.h"
 #include "access/syncscan.h"
@@ -44,13 +45,14 @@
 #include "access/xloginsert.h"
 #include "catalog/pg_database.h"
 #include "catalog/pg_database_d.h"
+#include "catalog/pg_subscription.h"
 #include "commands/vacuum.h"
 #include "executor/instrument_node.h"
 #include "executor/tuptable.h"
 #include "nodes/lockoptions.h"
 #include "pgstat.h"
 #include "port/pg_bitutils.h"
-#include "storage/buf.h"
+#include "replication/logicalworker.h"
 #include "storage/lmgr.h"
 #include "storage/predicate.h"
 #include "storage/proc.h"
@@ -78,6 +80,8 @@ static void check_inplace_rel_lock(HeapTuple oldtup);
 #endif
 static Bitmapset *HeapUpdateModifiedIdxAttrs(Relation relation,
 											 HeapTuple oldtup, HeapTuple newtup);
+static HeapTuple heap_form_hot_indexed_tuple(HeapTuple tup, int relnatts,
+											  const Bitmapset *modified_idx_attrs);
 static bool heap_acquire_tuplock(Relation relation, const ItemPointerData *tid,
 								 LockTupleMode mode, LockWaitPolicy wait_policy,
 								 bool *have_tuple_lock);
@@ -2109,9 +2113,25 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
 		 * If this is the single and first tuple on page, we can reinit the
 		 * page instead of restoring the whole thing.  Set flag, and hide
 		 * buffer references from XLogInsert.
+		 *
+		 * Also require that the page's tuple area contains nothing other than
+		 * this tuple.  Vacuum's lp_truncate_only second pass
+		 * (PRUNE_VACUUM_CLEANUP) does not call PageRepairFragmentation, so a
+		 * page can legitimately end up with one LP_UNUSED slot at offset 1
+		 * plus orphan tuple bytes left over from the previous lifetime. If
+		 * heap_insert reuses that LP_UNUSED slot, primary's page keeps the
+		 * orphan bytes while a standby replaying INSERT+INIT zeroes them.
+		 * Emitting INSERT+INIT in that case trips wal_consistency_checking.
+		 * Falling back to a regular INSERT (with the FPI on first touch after
+		 * a checkpoint) keeps replay byte-identical without sacrificing crash
+		 * safety.
+		 *
+		 * NOTE: This must mirror the logic in heap_multi_insert().
 		 */
 		if (ItemPointerGetOffsetNumber(&(heaptup->t_self)) == FirstOffsetNumber &&
-			PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
+			PageGetMaxOffsetNumber(page) == FirstOffsetNumber &&
+			((PageHeader) page)->pd_upper ==
+			((PageHeader) page)->pd_special - MAXALIGN(heaptup->t_len))
 		{
 			info |= XLOG_HEAP_INIT_PAGE;
 			bufflags |= REGBUF_WILL_INIT;
@@ -3202,9 +3222,11 @@ simple_heap_delete(Relation relation, const ItemPointerData *tid)
  */
 TM_Result
 heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
-			CommandId cid, uint32 options pg_attribute_unused(), Snapshot crosscheck, bool wait,
+			CommandId cid, uint32 options pg_attribute_unused(),
+			Snapshot crosscheck, bool wait,
 			TM_FailureData *tmfd, const LockTupleMode lockmode,
-			const Bitmapset *modified_idx_attrs, const bool hot_allowed)
+			const Bitmapset *modified_idx_attrs,
+			HeapUpdateIndexMode hot_mode)
 {
 	TM_Result	result;
 	TransactionId xid = GetCurrentTransactionId();
@@ -3230,6 +3252,9 @@ heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup,
 	bool		have_tuple_lock = false;
 	bool		iscombo;
 	bool		use_hot_update = false;
+	bool		hot_indexed = false;	/* HOT-indexed update (modified an
+										 * indexed attr but stayed HOT) */
+	Size		hi_bmbytes = 0; /* trailing modified-attrs bitmap size, if any */
 	bool		key_intact;
 	bool		all_visible_cleared = false;
 	bool		all_visible_cleared_new = false;
@@ -3794,6 +3819,38 @@ l2:
 
 	newtupsize = MAXALIGN(newtup->t_len);
 
+	/*
+	 * Keep HOT-indexed (SIU) chains uniform.  HeapUpdateHotAllowable returns
+	 * HEAP_HEAP_ONLY_UPDATE whenever this update modifies no indexed
+	 * attribute.  But if the tuple being updated is already a HOT-indexed
+	 * chain member (it carries HEAP_INDEXED_UPDATED), emitting a classic-HOT
+	 * version would splice a non-HEAP_INDEXED_UPDATED tuple into the chain.
+	 * The prune/collapse machinery forwards only HEAP_INDEXED_UPDATED members
+	 * through bridges, so such a classic-HOT version, once it dies mid
+	 * collapsed-chain, has no handler and trips the "not linked to from any
+	 * HOT chain" error.  Promote to HEAP_SELECTIVE_INDEX_UPDATE instead: with an
+	 * empty modified-attrs set the new version carries HEAP_INDEXED_UPDATED
+	 * and an empty inline-trailing bitmap, inserts into no index (nothing
+	 * changed), and keeps every chain member uniform.  Catalog relations are
+	 * classic-HOT only and never carry HEAP_INDEXED_UPDATED, so this never
+	 * fires for them.
+	 */
+	if (hot_mode == HEAP_HEAP_ONLY_UPDATE &&
+		(oldtup.t_data->t_infomask2 & HEAP_INDEXED_UPDATED) != 0)
+		hot_mode = HEAP_SELECTIVE_INDEX_UPDATE;
+
+	/*
+	 * A HOT-indexed update appends a fixed-size inline-trailing
+	 * modified-attrs bitmap to the new tuple (see access/hot_indexed.h).
+	 * Reserve room for it in the page-fit calculation now, while we still
+	 * might take the same-page HOT path; if the update later drops to non-HOT
+	 * (the tuple does not fit on the page) it is stored without the bitmap and
+	 * the reservation is simply conservative.
+	 */
+	if (hot_mode == HEAP_SELECTIVE_INDEX_UPDATE)
+		hi_bmbytes = HotIndexedBitmapBytes(RelationGetNumberOfAttributes(relation));
+	newtupsize = MAXALIGN(newtup->t_len + hi_bmbytes);
+
 	if (need_toast || newtupsize > pagefree)
 	{
 		TransactionId xmax_lock_old_tuple;
@@ -3891,7 +3948,7 @@ l2:
 		{
 			/* Note we always use WAL and FSM during updates */
 			heaptup = heap_toast_insert_or_update(relation, newtup, &oldtup, 0);
-			newtupsize = MAXALIGN(heaptup->t_len);
+			newtupsize = MAXALIGN(heaptup->t_len + hi_bmbytes);
 		}
 		else
 			heaptup = newtup;
@@ -3992,10 +4049,10 @@ l2:
 	{
 		/*
 		 * Since the new tuple is going into the same page, we might be able
-		 * to do a HOT update.  Check if any of the index columns have been
-		 * changed.
+		 * to do a HOT update.  Check if HeapUpdateHotAllowable() has
+		 * sanctioned it (HEAP_HEAP_ONLY_UPDATE or HEAP_SELECTIVE_INDEX_UPDATE).
 		 */
-		if (hot_allowed)
+		if (hot_mode != HEAP_UPDATE_ALL_INDEXES)
 			use_hot_update = true;
 	}
 	else
@@ -4004,6 +4061,27 @@ l2:
 		PageSetFull(page);
 	}
 
+	/*
+	 * For a same-page HOT-indexed update, replace heaptup with a copy that
+	 * carries the inline-trailing modified-attrs bitmap (and
+	 * HEAP_INDEXED_UPDATED).  Done here, outside the critical section,
+	 * because it allocates; the bitmap's size was reserved in newtupsize
+	 * above.  Only the stored tuple (heaptup) gets the bitmap and the flag;
+	 * the caller's newtup must NOT be marked HEAP_INDEXED_UPDATED, because it
+	 * has no trailing bitmap -- see the flag handling below.
+	 */
+	if (use_hot_update && hot_mode == HEAP_SELECTIVE_INDEX_UPDATE)
+	{
+		HeapTuple	ext;
+
+		ext = heap_form_hot_indexed_tuple(heaptup,
+										  RelationGetNumberOfAttributes(relation),
+										  modified_idx_attrs);
+		if (heaptup != newtup)
+			heap_freetuple(heaptup);
+		heaptup = ext;
+	}
+
 	/*
 	 * Compute replica identity tuple before entering the critical section so
 	 * we don't PANIC upon a memory allocation failure.
@@ -4040,6 +4118,29 @@ l2:
 		HeapTupleSetHeapOnly(heaptup);
 		/* Mark the caller's copy too, in case different from heaptup */
 		HeapTupleSetHeapOnly(newtup);
+
+		/*
+		 * For a HOT-indexed update, the new live tuple carries
+		 * HEAP_INDEXED_UPDATED so index scans walking the chain know it is a
+		 * HOT-indexed hop carrying an inline-trailing modified-attrs bitmap.
+		 *
+		 * Set the flag only on heaptup, the version actually stored on the
+		 * page: heaptup carries the trailing bitmap, so the flag's promise (a
+		 * bitmap occupies the final HotIndexedBitmapBytes(natts) bytes of the
+		 * item) holds.  The caller's newtup is a separate in-memory tuple
+		 * whose t_len does not include the bitmap; marking it
+		 * HEAP_INDEXED_UPDATED would assert a trailing bitmap that is not
+		 * there, so any later reader using ItemIdGetLength()-relative access
+		 * would misread attribute data as the bitmap.  We therefore leave
+		 * newtup's flag clear.  Nothing reads the modified-attrs bitmap off an
+		 * in-memory tuple; every consumer reads it from the page via the line
+		 * pointer's length.
+		 */
+		if (hot_mode == HEAP_SELECTIVE_INDEX_UPDATE)
+		{
+			heaptup->t_data->t_infomask2 |= HEAP_INDEXED_UPDATED;
+			hot_indexed = true;
+		}
 	}
 	else
 	{
@@ -4144,7 +4245,8 @@ l2:
 	if (have_tuple_lock)
 		UnlockTupleTuplock(relation, &(oldtup.t_self), lockmode);
 
-	pgstat_count_heap_update(relation, use_hot_update, newbuf != buffer);
+	pgstat_count_heap_update(relation, use_hot_update, hot_indexed,
+							 newbuf != buffer);
 
 	/*
 	 * If heaptup is a private copy, release it.  Don't forget to copy t_self
@@ -4290,7 +4392,7 @@ check_inplace_rel_lock(HeapTuple oldtup)
 
 /*
  * Check if the specified attribute's values are the same.  Subroutine for
- * HeapDetermineColumnsInfo.
+ * HeapUpdateModifiedIdxAttrs.
  */
 static bool
 heap_attr_equals(TupleDesc tupdesc, int attrnum, Datum value1, Datum value2,
@@ -4334,63 +4436,103 @@ heap_attr_equals(TupleDesc tupdesc, int attrnum, Datum value1, Datum value2,
 }
 
 /*
- * HOT updates are possible when either: a) there are no modified indexed
- * attributes, or b) the modified attributes are all on summarizing indexes.
- * Later, in heap_update(), we can choose to perform a HOT update if there is
- * space on the page for the new tuple and the following code has determined
- * that HOT is allowed.
+ * HeapUpdateHotAllowable --
+ *
+ * Classify an UPDATE for HOT eligibility from the set of indexed attributes
+ * it changed (modified_idx_attrs, computed by the executor):
+ *
+ *   HEAP_UPDATE_ALL_INDEXES   HOT is not permitted; the new tuple goes on a
+ *                             fresh TID and every index gets a new entry.
+ *   HEAP_HEAP_ONLY_UPDATE     Classic HOT: no non-summarizing indexed
+ *                             attribute changed, so no index needs a new
+ *                             entry and the new tuple joins the chain via a
+ *                             t_ctid forward link.
+ *   HEAP_SELECTIVE_INDEX_UPDATE HOT with selective index update: at least one
+ *                             non-summarizing index's attribute changed, but
+ *                             the new tuple can still join the HOT chain on
+ *                             the same page; only the indexes whose
+ *                             attributes changed receive a new entry.
+ *
+ * This routine only classifies the update; heap_update() performs it and may
+ * still fall back to a non-HOT update when the new tuple does not fit on the
+ * page, exactly as for classic HOT.
  */
-bool
-HeapUpdateHotAllowable(Relation relation, const Bitmapset *modified_idx_attrs,
-					   bool *summarized_only)
+HeapUpdateIndexMode
+HeapUpdateHotAllowable(Relation relation, const Bitmapset *modified_idx_attrs)
 {
-	bool		hot_allowed;
+	const Bitmapset *all_idx_attrs;
 
 	/*
-	 * Let's be optimistic and start off by assuming the best case, no indexes
-	 * need updating and HOT is allowable.
+	 * Case (a): no indexed attribute was modified -> classic HOT.
 	 */
-	hot_allowed = true;
-	*summarized_only = false;
+	if (bms_is_empty(modified_idx_attrs))
+		return HEAP_HEAP_ONLY_UPDATE;
 
 	/*
-	 * Check for case (a); when there are no modified index attributes HOT is
-	 * allowed.
+	 * Case (b): at least one indexed attribute changed.  If all of them are
+	 * used only by summarizing indexes, we can still take the classic HOT
+	 * path -- the summarizing index AM gets a new entry via aminsert and no
+	 * non-summarizing index needs to change.
 	 */
-	if (bms_is_empty(modified_idx_attrs))
-		hot_allowed = true;
-	else
-	{
-		Bitmapset  *sum_attrs = RelationGetIndexAttrBitmap(relation,
-														   INDEX_ATTR_BITMAP_SUMMARIZED);
+	if (bms_is_subset(modified_idx_attrs, RelationGetIndexAttrBitmapNoCopy(relation,
+												INDEX_ATTR_BITMAP_SUMMARIZED)))
+		return HEAP_HEAP_ONLY_UPDATE;
 
-		/*
-		 * At least one index attribute was modified, but is this case (b)
-		 * where all the modified index attributes are only used by
-		 * summarizing indexes?  If it is, then we need to update those
-		 * indexes, but this update can still be considered heap-only (HOT)
-		 * and avoid updating any non-summarizing indexes on the relation.
-		 */
-		if (bms_is_subset(modified_idx_attrs, sum_attrs))
-		{
-			hot_allowed = true;
-			*summarized_only = true;
-		}
-		else
-		{
-			/*
-			 * Now we know a) one or more indexed attributes were modified
-			 * (changed value, not just referenced within the UPDATE) and that
-			 * b) at least one of those attributes is used by a
-			 * non-summarizing index. HOT is not allowed.
-			 */
-			hot_allowed = false;
-		}
+	/*
+	 * A non-summarizing indexed attribute changed.  HOT-indexed is supported
+	 * whenever the relation can tolerate extra index entries in a chain whose
+	 * per-chain-member keys may differ.  The logical-replication apply path
+	 * is gated above by hot_indexed_on_apply.  The remaining
+	 * HEAP_UPDATE_ALL_INDEXES fallbacks are:
+	 *
+	 * - An UPDATE that modifies an attribute referenced by an expression
+	 * index.  Selective maintenance of an expression index requires
+	 * evaluating the indexed expression to decide whether its value (hence
+	 * its entry) changed; that expression-aware path is not implemented yet,
+	 * so such an update falls back to non-HOT.  Updates that do not touch any
+	 * expression-index attribute stay eligible.
+	 *
+	 * - An UPDATE that modifies every indexed attribute of the relation.
+	 * HOT-indexed only pays off when it can skip maintaining at least one
+	 * index whose key did not change; if all indexed attributes changed there
+	 * is nothing to skip, so a plain non-HOT update is cheaper (it avoids the
+	 * chain-walk and bitmap-overlap overhead).
+	 */
+	all_idx_attrs = RelationGetIndexAttrBitmapNoCopy(relation,
+											   INDEX_ATTR_BITMAP_INDEXED);
 
-		bms_free(sum_attrs);
-	}
+	/*
+	 * System catalogs keep classic HOT (an UPDATE touching no non-summarizing
+	 * indexed attribute already returned HEAP_HEAP_ONLY_UPDATE above), but do
+	 * NOT take the HOT-indexed path: catalog reads go through many code paths
+	 * (systable index scans, SnapshotDirty unique checks, seqscans in
+	 * orderings the chain-walk dedup does not cover) that are not all proven
+	 * safe against stale chain entries.  Falling back to a non-HOT update
+	 * here is exactly the pre-HOT-indexed behaviour for such catalog updates.
+	 */
+	if (IsCatalogRelation(relation))
+		return HEAP_UPDATE_ALL_INDEXES;
 
-	return hot_allowed;
+	/*
+	 * Disqualify when the update touches an attribute referenced by an
+	 * expression index (see case 1 above).  Updates that leave every
+	 * expression-index attribute unchanged remain eligible.
+	 */
+	if (bms_overlap(modified_idx_attrs,
+					RelationGetIndexAttrBitmapNoCopy(relation,
+													 INDEX_ATTR_BITMAP_EXPRESSION)))
+		return HEAP_UPDATE_ALL_INDEXES;
+
+	/*
+	 * If every indexed attribute changed, a HOT-selective update could not
+	 * skip any index -- each index needs a fresh entry anyway -- so it would
+	 * pay the HOT/SIU chain-walk and bitmap-overlap overhead for no saved
+	 * index maintenance.  Fall back to a plain non-HOT update in that case.
+	 */
+	if (bms_is_subset(all_idx_attrs, modified_idx_attrs))
+		return HEAP_UPDATE_ALL_INDEXES;
+
+	return HEAP_SELECTIVE_INDEX_UPDATE;
 }
 
 /*
@@ -4402,15 +4544,33 @@ LockTupleMode
 HeapUpdateDetermineLockmode(Relation relation, const Bitmapset *modified_idx_attrs)
 {
 	LockTupleMode lockmode = LockTupleExclusive;
+	const Bitmapset *key_attrs;
+
+	/*
+	 * Common fast path: when no indexed attribute changed (e.g. pgbench-style
+	 * "UPDATE t SET non_idx_col = ..." or the wide_0 "UPDATE t SET id = id"
+	 * workload after the executor's fast path in ExecUpdateModifiedIdxAttrs),
+	 * modified_idx_attrs is empty and a key column cannot have changed.  Skip
+	 * the relcache lookup and return the weaker lock immediately.  At high
+	 * TPS this avoids a per-UPDATE RelationGetIndexAttrBitmap call (and its
+	 * bms_copy) on the KEY bitmap.
+	 */
+	if (bms_is_empty(modified_idx_attrs))
+		return LockTupleNoKeyExclusive;
 
-	Bitmapset  *key_attrs = RelationGetIndexAttrBitmap(relation,
-													   INDEX_ATTR_BITMAP_KEY);
+	/*
+	 * Borrow the cached bitmap rather than copying it; we only test overlap
+	 * and never mutate or free key_attrs.  HeapUpdateDetermineLockmode runs
+	 * without buffer locks but the relcache entry is pinned by the caller's
+	 * lock on the relation, and we touch nothing between fetch and the
+	 * bms_overlap that could trigger a relcache invalidation.
+	 */
+	key_attrs = RelationGetIndexAttrBitmapNoCopy(relation,
+												 INDEX_ATTR_BITMAP_KEY);
 
 	if (!bms_overlap(modified_idx_attrs, key_attrs))
 		lockmode = LockTupleNoKeyExclusive;
 
-	bms_free(key_attrs);
-
 	return lockmode;
 }
 
@@ -4495,6 +4655,71 @@ HeapUpdateModifiedIdxAttrs(Relation relation, HeapTuple oldtup, HeapTuple newtup
 	return modified_idx_attrs;
 }
 
+/*
+ * heap_form_hot_indexed_tuple
+ *
+ * Return a newly palloc'd copy of tup that carries the fixed-size
+ * inline-trailing modified-attributes bitmap (see access/hot_indexed.h),
+ * with HEAP_INDEXED_UPDATED set.  The bitmap records the user attributes in
+ * modified_idx_attrs (the indexed attributes this UPDATE changed, using the
+ * FirstLowInvalidHeapAttributeNumber offset convention); an empty set yields
+ * an all-zero bitmap, which is correct for the chain-uniformity promotion of
+ * a classic-HOT update on an already-HOT-indexed chain.
+ *
+ * The bitmap occupies the final HotIndexedBitmapBytes(natts) bytes of the
+ * tuple, where natts is the tuple's own attribute count
+ * (HeapTupleHeaderGetNatts) -- which a reader recovers from the stored tuple,
+ * so the bitmap stays locatable even after the relation's natts later grows
+ * via ADD COLUMN.  For a freshly formed UPDATE tuple this equals the
+ * relation's current natts; we assert that to catch any future divergence.
+ * The bitmap sits past the attribute data, so heap_deform_tuple never sees
+ * it.  The caller must have reserved room for the extra bytes in the page-fit
+ * calculation, and must free the returned tuple.
+ */
+static HeapTuple
+heap_form_hot_indexed_tuple(HeapTuple tup, int relnatts,
+							const Bitmapset *modified_idx_attrs)
+{
+	int			natts = HeapTupleHeaderGetNatts(tup->t_data);
+	Size		bmbytes;
+	Size		newlen;
+	HeapTuple	newtuple;
+	uint8	   *bitmap;
+	int			x = -1;
+
+	/*
+	 * The bitmap is sized and located by the tuple's own natts; a freshly
+	 * formed UPDATE tuple carries the full relation natts.  If these ever
+	 * diverge the page-fit reservation (made with relnatts) and the actual
+	 * bitmap size would disagree.
+	 */
+	Assert(natts == relnatts);
+	bmbytes = HotIndexedBitmapBytes(natts);
+	newlen = tup->t_len + bmbytes;
+
+	newtuple = (HeapTuple) palloc0(HEAPTUPLESIZE + newlen);
+	newtuple->t_len = newlen;
+	newtuple->t_self = tup->t_self;
+	newtuple->t_tableOid = tup->t_tableOid;
+	newtuple->t_data = (HeapTupleHeader) ((char *) newtuple + HEAPTUPLESIZE);
+
+	/* copy the original tuple; the trailing bitmap bytes stay zero */
+	memcpy(newtuple->t_data, tup->t_data, tup->t_len);
+	newtuple->t_data->t_infomask2 |= HEAP_INDEXED_UPDATED;
+
+	bitmap = HotIndexedGetModifiedBitmapRW(newtuple->t_data, newlen, natts);
+	while ((x = bms_next_member(modified_idx_attrs, x)) >= 0)
+	{
+		AttrNumber	attnum = x + FirstLowInvalidHeapAttributeNumber;
+
+		/* only user attributes can be modified-and-indexed */
+		if (attnum >= 1)
+			HotIndexedSetAttrModified(bitmap, attnum);
+	}
+
+	return newtuple;
+}
+
 /*
  *	simple_heap_update - replace a tuple
  *
@@ -4505,7 +4730,7 @@ HeapUpdateModifiedIdxAttrs(Relation relation, HeapTuple oldtup, HeapTuple newtup
  */
 void
 simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tup,
-				   TU_UpdateIndexes *update_indexes)
+				   bool *update_all_indexes)
 {
 	TM_Result	result;
 	TM_FailureData tmfd;
@@ -4514,14 +4739,15 @@ simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tup
 	BufferHeapTupleTableSlot *bslot;
 	HeapTuple	oldtup;
 	bool		shouldFree = true;
-	Bitmapset  *idx_attrs,
-			   *modified_idx_attrs;
-	bool		hot_allowed,
-				summarized_only;
+	Bitmapset  *idx_attrs;
+	Bitmapset  *local_modified_idx_attrs;
+	HeapUpdateIndexMode hot_mode;
 	Buffer		buffer;
 
 	Assert(ItemPointerIsValid(otid));
 
+	*update_all_indexes = false;
+
 	/*
 	 * Fetch this bitmap of interesting attributes from relcache before
 	 * obtaining a buffer lock because if we are doing an update on one of the
@@ -4571,8 +4797,6 @@ simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tup
 		 */
 		Assert(RelationSupportsSysCache(RelationGetRelid(relation)));
 
-		*update_indexes = TU_None;
-
 		/* modified_idx_attrs not yet initialized */
 		bms_free(idx_attrs);
 		ExecDropSingleTupleTableSlot(slot);
@@ -4588,13 +4812,14 @@ simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tup
 	ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, slot, buffer);
 	oldtup = ExecFetchSlotHeapTuple(slot, false, &shouldFree);
 
-	modified_idx_attrs = HeapUpdateModifiedIdxAttrs(relation, oldtup, tup);
-	lockmode = HeapUpdateDetermineLockmode(relation, modified_idx_attrs);
-	hot_allowed = HeapUpdateHotAllowable(relation, modified_idx_attrs, &summarized_only);
+	local_modified_idx_attrs = HeapUpdateModifiedIdxAttrs(relation, oldtup, tup);
+	lockmode = HeapUpdateDetermineLockmode(relation, local_modified_idx_attrs);
+	hot_mode = HeapUpdateHotAllowable(relation, local_modified_idx_attrs);
 
-	result = heap_update(relation, otid, tup, GetCurrentCommandId(true), 0,
+	result = heap_update(relation, otid, tup, GetCurrentCommandId(true),
+						 0 /* options */ ,
 						 InvalidSnapshot, true /* wait for commit */ ,
-						 &tmfd, lockmode, modified_idx_attrs, hot_allowed);
+						 &tmfd, lockmode, local_modified_idx_attrs, hot_mode);
 
 	if (shouldFree)
 		heap_freetuple(oldtup);
@@ -4602,14 +4827,6 @@ simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tup
 	ExecDropSingleTupleTableSlot(slot);
 	bms_free(idx_attrs);
 
-	/*
-	 * Decide whether new index entries are needed for the tuple
-	 *
-	 * If the update is not HOT, we must update all indexes. If the update is
-	 * HOT, it could be that we updated summarized columns, so we either
-	 * update only summarized indexes, or none at all.
-	 */
-	*update_indexes = TU_None;
 	switch (result)
 	{
 		case TM_SelfModified:
@@ -4618,11 +4835,14 @@ simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tup
 			break;
 
 		case TM_Ok:
-			/* done successfully */
-			if (!HeapTupleIsHeapOnly(tup))
-				*update_indexes = TU_All;
-			else if (summarized_only)
-				*update_indexes = TU_Summarizing;
+
+			/*
+			 * If the tuple stored by heap_update is heap-only this was a HOT
+			 * update and (subject to per-index checks) not every index needs
+			 * a new entry; otherwise every index must get one pointing at the
+			 * new tuple's TID.
+			 */
+			*update_all_indexes = !HeapTupleIsHeapOnly(tup);
 			break;
 
 		case TM_Updated:
@@ -4637,6 +4857,8 @@ simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tup
 			elog(ERROR, "unrecognized heap_update status: %u", result);
 			break;
 	}
+
+	bms_free(local_modified_idx_attrs);
 }
 
 
@@ -8201,39 +8423,52 @@ index_delete_check_htid(TM_IndexDeleteOp *delstate,
 	Assert(OffsetNumberIsValid(istatus->idxoffnum));
 
 	if (unlikely(indexpagehoffnum > maxoff))
-		ereport(ERROR,
-				(errcode(ERRCODE_INDEX_CORRUPTED),
-				 errmsg_internal("heap tid from index tuple (%u,%u) points past end of heap page line pointer array at offset %u of block %u in index \"%s\"",
-								 ItemPointerGetBlockNumber(htid),
-								 indexpagehoffnum,
-								 istatus->idxoffnum, delstate->iblknum,
-								 RelationGetRelationName(delstate->irel))));
+	{
+		/*
+		 * Under HOT-indexed updates, a stale btree entry can outlive heap
+		 * pruning/vacuum of the page it targets; if the target offset is past
+		 * the current max, treat as vacuumable instead of raising an
+		 * index-corruption error.
+		 */
+		return;
+	}
 
 	iid = PageGetItemId(page, indexpagehoffnum);
 	if (unlikely(!ItemIdIsUsed(iid)))
-		ereport(ERROR,
-				(errcode(ERRCODE_INDEX_CORRUPTED),
-				 errmsg_internal("heap tid from index tuple (%u,%u) points to unused heap page item at offset %u of block %u in index \"%s\"",
-								 ItemPointerGetBlockNumber(htid),
-								 indexpagehoffnum,
-								 istatus->idxoffnum, delstate->iblknum,
-								 RelationGetRelationName(delstate->irel))));
+	{
+		/*
+		 * Under HOT-indexed updates, a stale btree entry can legitimately
+		 * point at an LP that has since been reclaimed to LP_UNUSED by
+		 * pruning before VACUUM processed the index.  Treat that as "the
+		 * chain is vacuumable" (caller's downstream chain walk will reach the
+		 * same conclusion) rather than an index-corruption error.
+		 */
+		return;
+	}
 
-	if (ItemIdHasStorage(iid))
+	/*
+	 * A redirect target (LP_REDIRECT) is a valid chain root: an index entry
+	 * pointing at it is legitimate and the caller's chain walk decides
+	 * deletability.  Only genuinely normal tuples are inspected below.
+	 */
+	if (ItemIdIsNormal(iid))
 	{
 		HeapTupleHeader htup;
 
-		Assert(ItemIdIsNormal(iid));
 		htup = (HeapTupleHeader) PageGetItem(page, iid);
 
 		if (unlikely(HeapTupleHeaderIsHeapOnly(htup)))
-			ereport(ERROR,
-					(errcode(ERRCODE_INDEX_CORRUPTED),
-					 errmsg_internal("heap tid from index tuple (%u,%u) points to heap-only tuple at offset %u of block %u in index \"%s\"",
-									 ItemPointerGetBlockNumber(htid),
-									 indexpagehoffnum,
-									 istatus->idxoffnum, delstate->iblknum,
-									 RelationGetRelationName(delstate->irel))));
+		{
+			/*
+			 * A HOT-indexed update plants a fresh index entry that points
+			 * directly at a heap-only tuple; those tuples carry
+			 * HEAP_INDEXED_UPDATED.  A stale btree entry can also arrive at a
+			 * heap-only tuple when a chain root got pruned out.  Both are
+			 * legal under HOT-indexed; exempt them from the "index entries
+			 * must target chain roots" invariant and let the caller's chain
+			 * walk decide whether the entry is deletable.
+			 */
+		}
 	}
 }
 
@@ -8448,7 +8683,7 @@ heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
 
 			/* Are any tuples from this HOT chain non-vacuumable? */
 			if (heap_hot_search_buffer(&tmp, rel, buf, &SnapshotNonVacuumable,
-									   &heapTuple, NULL, true))
+									   &heapTuple, NULL, true, NULL, NULL, NULL))
 				continue;		/* can't delete entry */
 
 			/* Caller will delete, since whole HOT chain is vacuumable */
@@ -9030,9 +9265,20 @@ log_heap_update(Relation reln, Buffer oldbuf,
 		}
 	}
 
-	/* If new tuple is the single and first tuple on page... */
+	/*
+	 * If new tuple is the single and first tuple on page, replay can reinit
+	 * the page from scratch.
+	 *
+	 * Also require that the page's tuple area contains nothing other than this
+	 * tuple.  See heap_insert for why this matters when vacuum has left orphan
+	 * tuple bytes behind an LP_UNUSED slot.
+	 *
+	 * NOTE: this must mirror the same logic in heap_insert()
+	 */
 	if (ItemPointerGetOffsetNumber(&(newtup->t_self)) == FirstOffsetNumber &&
-		PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
+		PageGetMaxOffsetNumber(page) == FirstOffsetNumber &&
+		((PageHeader) page)->pd_upper ==
+		((PageHeader) page)->pd_special - MAXALIGN(newtup->t_len))
 	{
 		info |= XLOG_HEAP_INIT_PAGE;
 		init = true;
@@ -9094,6 +9340,7 @@ log_heap_update(Relation reln, Buffer oldbuf,
 	 * The 'data' doesn't include the common prefix or suffix.
 	 */
 	XLogRegisterBufData(0, &xlhdr, SizeOfHeapHeader);
+
 	if (prefixlen == 0)
 	{
 		XLogRegisterBufData(0,
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index e6cb8197dec..5f65c4e8f8c 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -224,48 +224,37 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
 					CommandId cid, uint32 options,
 					Snapshot snapshot, Snapshot crosscheck,
 					bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode,
-					const Bitmapset *modified_idx_attrs, TU_UpdateIndexes *update_indexes)
+					Bitmapset **modified_attrs)
 {
 	bool		shouldFree = true;
 	HeapTuple	tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
-	bool		hot_allowed;
-	bool		summarized_only;
+	HeapUpdateIndexMode hot_mode;
 	TM_Result	result;
 
 	Assert(ItemPointerIsValid(otid));
 
-	hot_allowed = HeapUpdateHotAllowable(relation, modified_idx_attrs, &summarized_only);
-	*lockmode = HeapUpdateDetermineLockmode(relation, modified_idx_attrs);
+	hot_mode = HeapUpdateHotAllowable(relation, *modified_attrs);
+	*lockmode = HeapUpdateDetermineLockmode(relation, *modified_attrs);
 
 	/* Update the tuple with table oid */
 	slot->tts_tableOid = RelationGetRelid(relation);
 	tuple->t_tableOid = slot->tts_tableOid;
 
-	result = heap_update(relation, otid, tuple, cid, options, crosscheck, wait,
-						 tmfd, *lockmode, modified_idx_attrs, hot_allowed);
+	result = heap_update(relation, otid, tuple, cid, options,
+						 crosscheck, wait,
+						 tmfd, *lockmode, *modified_attrs, hot_mode);
 	ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
 
 	/*
-	 * Decide whether new index entries are needed for the tuple
-	 *
-	 * Note: heap_update returns the tid (location) of the new tuple in the
-	 * t_self field.
-	 *
-	 * If the update is not HOT, we must update all indexes. If the update is
-	 * HOT, it could be that we updated summarized columns, so we either
-	 * update only summarized indexes, or none at all.
+	 * Tell the caller whether every index needs a new entry.  If the new
+	 * tuple is not heap-only the update was not HOT: it is an independent
+	 * version requiring a fresh entry in every index, which we signal by
+	 * adding the whole-row attribute to *modified_attrs.  Otherwise (classic
+	 * HOT or HOT-indexed) the caller consults the per-index attributes.
 	 */
-	*update_indexes = TU_None;
-	if (result == TM_Ok)
-	{
-		if (HeapTupleIsHeapOnly(tuple))
-		{
-			if (summarized_only)
-				*update_indexes = TU_Summarizing;
-		}
-		else
-			*update_indexes = TU_All;
-	}
+	if (result == TM_Ok && !HeapTupleIsHeapOnly(tuple))
+		*modified_attrs = bms_add_member(*modified_attrs,
+										 TableTupleUpdateAllIndexes);
 
 	if (shouldFree)
 		pfree(tuple);
@@ -731,9 +720,33 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
 			if (!index_getnext_slot(indexScan, ForwardScanDirection, slot))
 				break;
 
-			/* Since we used no scan keys, should never need to recheck */
+			/*
+			 * CLUSTER uses a no-key full-index scan; it cannot do any
+			 * tuple-level filtering itself.  The HOT-indexed reader path
+			 * routinely sets xs_recheck when walking chain entries whose
+			 * index key may be stale relative to the visible heap tuple.
+			 * Those entries cause the same live tuple to be visited via the
+			 * fresh hot-indexed-inserted entry too; including them would
+			 * duplicate rows in the rewritten heap.  Skip them here -- the
+			 * tuple is reachable through its canonical index entry.
+			 *
+			 * If xs_recheck is set with actual scan keys, that's a real lossy
+			 * index scenario CLUSTER can't handle (historical restriction).
+			 */
 			if (indexScan->xs_recheck)
-				elog(ERROR, "CLUSTER does not support lossy index conditions");
+			{
+				if (indexScan->numberOfKeys > 0)
+					elog(ERROR, "CLUSTER does not support lossy index conditions");
+				continue;
+			}
+
+			/*
+			 * Same reasoning as for xs_recheck: a HOT-indexed stale entry
+			 * would re-emit an already-visited tuple via its canonical fresh
+			 * entry.  Skip.
+			 */
+			if (indexScan->xs_hot_indexed_stale)
+				continue;
 		}
 		else
 		{
@@ -1647,30 +1660,48 @@ heapam_index_build_range_scan(Relation heapRelation,
 
 			offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
 
-			/*
-			 * If a HOT tuple points to a root that we don't know about,
-			 * obtain root items afresh.  If that still fails, report it as
-			 * corruption.
-			 */
-			if (root_offsets[offnum - 1] == InvalidOffsetNumber)
+			if ((heapTuple->t_data->t_infomask2 & HEAP_INDEXED_UPDATED) != 0)
 			{
-				Page		page = BufferGetPage(hscan->rs_cbuf);
-
-				LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
-				heap_get_root_tuples(page, root_offsets);
-				LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+				/*
+				 * HOT-indexed (Selective Index Update) live tuple: index it
+				 * under its OWN TID, not the chain root.  Its indexed values
+				 * differ from earlier chain members', and the bitmap-overlap
+				 * read path keeps an entry only when no hop after the entry's
+				 * target changed the index's attributes.  That holds for an
+				 * entry pointing directly at the live tuple (no later hop);
+				 * an entry pointed at the root would be dropped as stale,
+				 * losing the row.
+				 */
+				ItemPointerSet(&tid, ItemPointerGetBlockNumber(&heapTuple->t_self),
+							   offnum);
 			}
+			else
+			{
+				/*
+				 * If a HOT tuple points to a root that we don't know about,
+				 * obtain root items afresh.  If that still fails, report it
+				 * as corruption.
+				 */
+				if (root_offsets[offnum - 1] == InvalidOffsetNumber)
+				{
+					Page		page = BufferGetPage(hscan->rs_cbuf);
 
-			if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
-				ereport(ERROR,
-						(errcode(ERRCODE_DATA_CORRUPTED),
-						 errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
-										 ItemPointerGetBlockNumber(&heapTuple->t_self),
-										 offnum,
-										 RelationGetRelationName(heapRelation))));
+					LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
+					heap_get_root_tuples(page, root_offsets);
+					LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+				}
 
-			ItemPointerSet(&tid, ItemPointerGetBlockNumber(&heapTuple->t_self),
-						   root_offsets[offnum - 1]);
+				if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
+					ereport(ERROR,
+							(errcode(ERRCODE_DATA_CORRUPTED),
+							 errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
+											 ItemPointerGetBlockNumber(&heapTuple->t_self),
+											 offnum,
+											 RelationGetRelationName(heapRelation))));
+
+				ItemPointerSet(&tid, ItemPointerGetBlockNumber(&heapTuple->t_self),
+							   root_offsets[offnum - 1]);
+			}
 
 			/* Call the AM's callback routine to process the tuple */
 			callback(indexRelation, &tid, values, isnull, tupleIsAlive,
@@ -1835,7 +1866,8 @@ heapam_index_validate_scan(Relation heapRelation,
 		rootTuple = *heapcursor;
 		root_offnum = ItemPointerGetOffsetNumber(heapcursor);
 
-		if (HeapTupleIsHeapOnly(heapTuple))
+		if (HeapTupleIsHeapOnly(heapTuple) &&
+			(heapTuple->t_data->t_infomask2 & HEAP_INDEXED_UPDATED) == 0)
 		{
 			root_offnum = root_offsets[root_offnum - 1];
 			if (!OffsetNumberIsValid(root_offnum))
@@ -2531,7 +2563,6 @@ BitmapHeapScanNextBlock(TableScanDesc scan,
 
 	hscan->rs_cindex = 0;
 	hscan->rs_ntuples = 0;
-
 	/* Release buffer containing previous block. */
 	if (BufferIsValid(hscan->rs_cbuf))
 	{
@@ -2592,6 +2623,7 @@ BitmapHeapScanNextBlock(TableScanDesc scan,
 		 * offset.
 		 */
 		int			curslot;
+		bool		page_had_hot_indexed = false;
 
 		/* We must have extracted the tuple offsets by now */
 		Assert(noffsets > -1);
@@ -2601,12 +2633,65 @@ BitmapHeapScanNextBlock(TableScanDesc scan,
 			OffsetNumber offnum = offsets[curslot];
 			ItemPointerData tid;
 			HeapTupleData heapTuple;
+			bool		hot_indexed_stale = false;
 
 			ItemPointerSet(&tid, block, offnum);
 			if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
-									   &heapTuple, NULL, true))
-				hscan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
+									   &heapTuple, NULL, true,
+									   &hot_indexed_stale, NULL, NULL))
+			{
+				OffsetNumber resolved = ItemPointerGetOffsetNumber(&tid);
+				bool		already_have = false;
+
+				/*
+				 * A bitmap heap scan cannot attribute a TID to one index, so
+				 * any crossed in-chain HOT/SIU hop means the arriving entry
+				 * may be stale; recheck/dedup conservatively.
+				 */
+				if (hot_indexed_stale)
+					page_had_hot_indexed = true;
+
+				/*
+				 * With HOT-indexed updates, more than one bitmap entry on the
+				 * same block can chain-resolve to the same live tuple (a
+				 * stale old-key entry plus the fresh new-key entry, or
+				 * multiple stale entries from successive hot-indexed
+				 * updates).  Once we've seen any hot-indexed hop on this
+				 * block dedup inline so upper nodes (e.g., MERGE) don't see
+				 * the same row twice.  Preserve original insertion order:
+				 * MERGE's RETURNING ordering and test harness stability both
+				 * depend on it.  In the absence of hot-indexed on the page we
+				 * skip the linear scan entirely -- the TBM's TIDs are already
+				 * distinct by construction.
+				 */
+				if (page_had_hot_indexed)
+				{
+					for (int j = 0; j < ntup; j++)
+					{
+						if (hscan->rs_vistuples[j] == resolved)
+						{
+							already_have = true;
+							break;
+						}
+					}
+				}
+
+				if (!already_have)
+					hscan->rs_vistuples[ntup++] = resolved;
+
+				/*
+				 * If we reached the visible tuple through a HOT-indexed
+				 * (hot-indexed) hop, the bitmap index entry that pointed us
+				 * at the chain root may describe key values the visible tuple
+				 * no longer has.  Force BitmapHeapScan to run its recheck
+				 * qual against these tuples even if the bitmap page was
+				 * otherwise exact.
+				 */
+				if (hot_indexed_stale)
+					*recheck = true;
+			}
 		}
+
 	}
 	else
 	{
diff --git a/src/backend/access/heap/heapam_indexscan.c b/src/backend/access/heap/heapam_indexscan.c
index 33d14f1de7d..e7e381d0501 100644
--- a/src/backend/access/heap/heapam_indexscan.c
+++ b/src/backend/access/heap/heapam_indexscan.c
@@ -15,6 +15,7 @@
 #include "postgres.h"
 
 #include "access/heapam.h"
+#include "access/hot_indexed.h"
 #include "access/relscan.h"
 #include "storage/predicate.h"
 
@@ -35,6 +36,14 @@ heapam_index_fetch_begin(Relation rel, uint32 flags)
 	hscan->xs_blk = InvalidBlockNumber;
 	hscan->xs_vmbuffer = InvalidBuffer;
 
+	/*
+	 * Scratch space for the union of modified-attrs bitmaps that a HOT/SIU
+	 * chain walk crosses, sized for this relation's column count.  Threaded
+	 * back out through xs_hot_indexed_crossed for the index-access layer.
+	 */
+	hscan->xs_base.xs_hot_indexed_crossed =
+		palloc0(HotIndexedBitmapBytes(RelationGetNumberOfAttributes(rel)));
+
 	return &hscan->xs_base;
 }
 
@@ -63,6 +72,9 @@ heapam_index_fetch_end(IndexFetchTableData *scan)
 	if (BufferIsValid(hscan->xs_vmbuffer))
 		ReleaseBuffer(hscan->xs_vmbuffer);
 
+	if (hscan->xs_base.xs_hot_indexed_crossed != NULL)
+		pfree(hscan->xs_base.xs_hot_indexed_crossed);
+
 	pfree(hscan);
 }
 
@@ -83,13 +95,24 @@ heapam_index_fetch_end(IndexFetchTableData *scan)
  * globally dead; *all_dead is set true if all members of the HOT chain
  * are vacuumable, false if not.
  *
+ * If hot_indexed_recheck is not NULL, *hot_indexed_recheck is set true iff the
+ * walk crossed a HOT-selectively-updated (HOT/SIU) hop after the entry tuple
+ * on the way to the returned tuple -- i.e. the arriving index entry's stored
+ * key may no longer match the live tuple, so the caller must recheck it (via
+ * a leaf-key comparison or a qual recheck).  The entry tuple's own producing
+ * hop is excluded, so a fresh entry pointing directly at its tuple is not
+ * flagged.  When no such hop was crossed, *hot_indexed_recheck is left false.
+ *
  * Unlike heap_fetch, the caller must already have pin and (at least) share
  * lock on the buffer; it is still pinned/locked at exit.
  */
 bool
 heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
 					   Snapshot snapshot, HeapTuple heapTuple,
-					   bool *all_dead, bool first_call)
+					   bool *all_dead, bool first_call,
+					   bool *hot_indexed_recheck,
+					   uint8 *crossed_bitmap,
+					   bool *prefix_all_dead)
 {
 	Page		page = BufferGetPage(buffer);
 	TransactionId prev_xmax = InvalidTransactionId;
@@ -98,12 +121,27 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
 	bool		at_chain_start;
 	bool		valid;
 	bool		skip;
+	bool		prefix_dead = true;
 	GlobalVisState *vistest = NULL;
+	int			relnatts = RelationGetNumberOfAttributes(relation);
 
 	/* If this is not the first call, previous call returned a (live!) tuple */
 	if (all_dead)
 		*all_dead = first_call;
 
+	/*
+	 * On the first call, clear the recheck flag and the crossed-attrs union.
+	 * On subsequent calls (same chain continuing) keep whatever an earlier
+	 * hop already accumulated.
+	 */
+	if (first_call)
+	{
+		if (hot_indexed_recheck)
+			*hot_indexed_recheck = false;
+		if (crossed_bitmap)
+			memset(crossed_bitmap, 0, HotIndexedBitmapBytes(relnatts));
+	}
+
 	blkno = ItemPointerGetBlockNumber(tid);
 	offnum = ItemPointerGetOffsetNumber(tid);
 	at_chain_start = first_call;
@@ -130,7 +168,17 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
 			/* We should only see a redirect at start of chain */
 			if (ItemIdIsRedirected(lp) && at_chain_start)
 			{
-				/* Follow the redirect */
+				/*
+				 * Follow the redirect.  A collapsed dead prefix is preserved
+				 * as a run of forwarding stubs, each carrying its segment's
+				 * modified-attrs bitmap, ending at the first live tuple;
+				 * chain collapse reclaims a dead member only when its
+				 * attributes are a subset of the surviving later hops (see
+				 * pruneheap.c).  So the stubs and live hops this walk crosses
+				 * below contribute the complete union of every collapsed
+				 * hop's modified attributes, and that union drives the
+				 * overlap staleness test for the index-access layer.
+				 */
 				offnum = ItemIdGetRedirect(lp);
 				at_chain_start = false;
 				continue;
@@ -151,10 +199,95 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
 		ItemPointerSet(&heapTuple->t_self, blkno, offnum);
 
 		/*
-		 * Shouldn't see a HEAP_ONLY tuple at chain start.
+		 * A collapse-survivor stub is an LP_NORMAL item but not a real tuple:
+		 * it is a freeze-safe forwarding node carrying the modified-attrs
+		 * bitmap for the chain segment it represents.  Treat it like a
+		 * crossed HOT/SIU hop -- arm the recheck and OR its bitmap into the
+		 * crossed union (unless we arrived directly at it, in which case the
+		 * arriving entry already reflects this segment's value) -- then
+		 * follow its forward link.  A stub is never visible and never
+		 * returned, and its forward link is a logical, not xid-continuous,
+		 * edge, so reset prev_xmax to skip the chain-integrity check on the
+		 * next member.
+		 */
+		if (HotIndexedHeaderIsStub(heapTuple->t_data))
+		{
+			if (!at_chain_start)
+			{
+				if (hot_indexed_recheck)
+					*hot_indexed_recheck = true;
+				if (crossed_bitmap)
+				{
+					int			bmnatts =
+						HotIndexedTupleBitmapNatts(heapTuple->t_data);
+
+					HotIndexedBitmapUnion(crossed_bitmap,
+										  HotIndexedGetModifiedBitmap(heapTuple->t_data,
+																	  heapTuple->t_len,
+																	  bmnatts),
+										  bmnatts);
+				}
+			}
+			offnum = HotIndexedStubGetForward(heapTuple->t_data);
+			at_chain_start = false;
+			prev_xmax = InvalidTransactionId;
+			continue;
+		}
+
+		/*
+		 * Shouldn't see a HEAP_ONLY tuple at chain start, unless that tuple
+		 * is the target of a freshly-inserted hot-indexed index entry: then
+		 * arriving directly at a heap-only HOT-indexed tuple is legal and the
+		 * tuple is the canonical visible version, so we fall through and
+		 * apply normal visibility checks to it.  Otherwise, treat it as a
+		 * broken chain.
 		 */
 		if (at_chain_start && HeapTupleIsHeapOnly(heapTuple))
-			break;
+		{
+			if ((heapTuple->t_data->t_infomask2 & HEAP_INDEXED_UPDATED) == 0)
+				break;
+
+			/*
+			 * We were pointed directly at this hot-indexed tuple.  The index
+			 * entry we arrived through was inserted *for* this update, so it
+			 * reflects this tuple's current attribute values; its own
+			 * producing hop is not a crossed hop, so it is not flagged for
+			 * recheck (a fresh entry is never stale for its own index).
+			 */
+		}
+		else if (hot_indexed_recheck != NULL &&
+				 (heapTuple->t_data->t_infomask2 & HEAP_INDEXED_UPDATED) != 0)
+		{
+			/*
+			 * A HOT/SIU hop reached by following the chain (or a redirect)
+			 * from an earlier entry: this hop is crossed, so the arriving
+			 * entry's stored key may no longer match the live tuple.  Set the
+			 * recheck flag to tell the index-access layer to consult the
+			 * crossed-attrs union; that union (accumulated below) is what
+			 * decides staleness.
+			 */
+			*hot_indexed_recheck = true;
+
+			/*
+			 * Accumulate this hop's modified-attrs bitmap into the crossed
+			 * union.  A tuple's inline bitmap records the indexed attributes
+			 * that changed at the hop INTO it, which is exactly the hop we
+			 * just crossed by advancing to it; ORing each crossed hop yields
+			 * the indexed attributes that changed after the entry's own
+			 * tuple.
+			 */
+			if (crossed_bitmap)
+			{
+				int			bmnatts =
+					HotIndexedTupleBitmapNatts(heapTuple->t_data);
+
+				HotIndexedBitmapUnion(crossed_bitmap,
+									  HotIndexedGetModifiedBitmap(heapTuple->t_data,
+																  heapTuple->t_len,
+																  bmnatts),
+									  bmnatts);
+			}
+		}
 
 		/*
 		 * The xmin should match the previous xmax value, else chain is
@@ -186,6 +319,15 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
 								 HeapTupleHeaderGetXmin(heapTuple->t_data));
 				if (all_dead)
 					*all_dead = false;
+
+				/*
+				 * Report whether every chain member skipped before this
+				 * visible tuple is dead to all transactions.  With a stale
+				 * verdict this lets the caller kill the arriving leaf safely.
+				 */
+				if (prefix_all_dead)
+					*prefix_all_dead = prefix_dead;
+
 				return true;
 			}
 		}
@@ -194,18 +336,25 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
 		/*
 		 * If we can't see it, maybe no one else can either.  At caller
 		 * request, check whether all chain members are dead to all
-		 * transactions.
+		 * transactions.  The same surely-dead test feeds prefix_dead, which
+		 * (unlike all_dead) is not reset when a visible tuple is found, so it
+		 * records whether the members skipped ahead of the returned tuple are
+		 * all dead to all -- the safe-to-kill-this-leaf condition.
 		 *
 		 * Note: if you change the criterion here for what is "dead", fix the
 		 * planner's get_actual_variable_range() function to match.
 		 */
-		if (all_dead && *all_dead)
+		if ((all_dead && *all_dead) || prefix_dead)
 		{
 			if (!vistest)
 				vistest = GlobalVisTestFor(relation);
 
 			if (!HeapTupleIsSurelyDead(heapTuple, vistest))
-				*all_dead = false;
+			{
+				if (all_dead)
+					*all_dead = false;
+				prefix_dead = false;
+			}
 		}
 
 		/*
@@ -273,7 +422,15 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
 											snapshot,
 											&bslot->base.tupdata,
 											all_dead,
-											!*heap_continue);
+											!*heap_continue,
+											&scan->xs_hot_indexed_recheck,
+											scan->xs_hot_indexed_crossed,
+											&scan->xs_prefix_all_dead);
+	if (!got_heap_tuple)
+	{
+		scan->xs_hot_indexed_recheck = false;
+		scan->xs_prefix_all_dead = false;
+	}
 	bslot->base.tupdata.t_self = *tid;
 	LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_UNLOCK);
 
diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c
index 1408989c568..6628f9bf85d 100644
--- a/src/backend/access/index/genam.c
+++ b/src/backend/access/index/genam.c
@@ -103,6 +103,9 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
 		scan->orderByData = NULL;
 
 	scan->xs_want_itup = false; /* may be set later */
+	scan->xs_index_only = false;	/* may be set later */
+
+	scan->xs_hot_indexed_stale = false;
 
 	/*
 	 * During recovery we ignore killed tuples and don't bother to kill them
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c
index 7967e939847..3c08136f630 100644
--- a/src/backend/access/index/indexam.c
+++ b/src/backend/access/index/indexam.c
@@ -44,6 +44,7 @@
 #include "postgres.h"
 
 #include "access/amapi.h"
+#include "access/hot_indexed.h"
 #include "access/relation.h"
 #include "access/reloptions.h"
 #include "access/relscan.h"
@@ -288,6 +289,11 @@ index_beginscan(Relation heapRelation,
 	/* prepare to fetch index matches from table */
 	scan->xs_heapfetch = table_index_fetch_begin(heapRelation, flags);
 
+	/*
+	 * Staleness of a HOT-indexed entry is decided by the crossed-attribute
+	 * bitmap (see index_fetch_heap), so scans do not need the leaf key.
+	 */
+
 	return scan;
 }
 
@@ -606,6 +612,15 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
 	/* XXX: we should assert that a snapshot is pushed or registered */
 	Assert(TransactionIdIsValid(RecentXmin));
 
+	/*
+	 * Reset the HOT-indexed recheck flag: it is set by the heap AM during
+	 * index_fetch_heap and is per-fetched-tuple, not per-index-entry. For
+	 * IndexOnlyScan, which may skip index_fetch_heap when the VM says the
+	 * entry is visible-to-all, this ensures we don't carry a stale value from
+	 * a previous entry.
+	 */
+	scan->xs_hot_indexed_stale = false;
+
 	/*
 	 * The AM's amgettuple proc finds the next index entry matching the scan
 	 * keys, and puts the TID into scan->xs_heaptid.  It should also set
@@ -666,15 +681,97 @@ index_fetch_heap(IndexScanDesc scan, TupleTableSlot *slot)
 	if (found)
 		pgstat_count_heap_fetch(scan->indexRelation);
 
+	/*
+	 * The table AM reported, via xs_hot_indexed_recheck, whether the walk to
+	 * the live tuple crossed a HOT-indexed hop after the arriving index
+	 * entry's own tuple.  When it did, the entry's stored key may no longer
+	 * agree with the live tuple, and we must decide whether to drop it.
+	 *
+	 * The crossed-attribute bitmap (xs_hot_indexed_crossed) is the staleness
+	 * authority.  It is the union of the per-hop modified-attribute bitmaps
+	 * of every hop the walk crossed, and it is complete: each crossed live
+	 * hop, collapse-survivor stub, and redirected (collapsed) prefix
+	 * contributes its segment's bitmap, and chain collapse only ever reclaims
+	 * a member whose attributes are a subset of the surviving later hops (see
+	 * pruneheap.c).  Therefore:
+	 *
+	 * - if the union is disjoint from the heap columns this index references,
+	 * none of the index's inputs changed across the chain, so the entry's key
+	 * still matches the live tuple: keep it; and
+	 *
+	 * - if the union overlaps them, one of this index's key columns changed
+	 * after the entry's own tuple, so the entry is stale: drop it.
+	 *
+	 * Dropping on overlap is correct even when the key was cycled away and
+	 * back to its original value (an ABA update): the update that set the
+	 * value back created a fresh entry pointing at its own (live) tuple,
+	 * whose walk crosses no later key-changing hop, so that entry uniquely
+	 * supplies the row while this stale ancestor entry is dropped.  No
+	 * value-recheck is needed, so this works for any access method; the
+	 * staleness decision is purely attribute-based.
+	 */
+	scan->xs_hot_indexed_stale = false;
+	if (found &&
+		scan->xs_heapfetch->xs_hot_indexed_recheck &&
+		scan->xs_heapfetch->xs_hot_indexed_crossed != NULL)
+	{
+		Bitmapset  *idxattrs = RelationGetIndexedAttrs(scan->indexRelation);
+		int			x = -1;
+
+		while ((x = bms_next_member(idxattrs, x)) >= 0)
+		{
+			AttrNumber	attnum = x + FirstLowInvalidHeapAttributeNumber;
+
+			/* the crossed bitmap records only user attributes */
+			if (attnum >= 1 &&
+				HotIndexedAttrIsModified(scan->xs_heapfetch->xs_hot_indexed_crossed,
+										 attnum))
+			{
+				scan->xs_hot_indexed_stale = true;
+				break;
+			}
+		}
+		bms_free(idxattrs);
+	}
+
 	/*
 	 * If we scanned a whole HOT chain and found only dead tuples, tell index
 	 * AM to kill its entry for that TID (this will take effect in the next
 	 * amgettuple call, in index_getnext_tid).  We do not do this when in
 	 * recovery because it may violate MVCC to do so.  See comments in
 	 * RelationGetIndexScan().
+	 *
+	 * Additionally kill a stale HOT-indexed leaf (one whose key the live
+	 * tuple no longer holds) when every chain member skipped before the
+	 * returned tuple is dead to all transactions (xs_prefix_all_dead): no
+	 * snapshot can reach a matching version through this leaf, so it is
+	 * redundant and reclaiming it bounds the index bloat HOT-indexed updates
+	 * create.
+	 *
+	 * Two independent conditions make this safe:
+	 *
+	 *  - The surely-dead prefix gate (xs_prefix_all_dead) means no snapshot,
+	 *    including older ones still running, can reach a version through this
+	 *    leaf whose key matches: every member ahead of the live tuple is dead
+	 *    to all.  This is what makes it MVCC-safe, exactly as for the
+	 *    all_dead case.
+	 *
+	 *  - The leaf is genuinely redundant, not the row's only entry.  A stale
+	 *    verdict means the crossed-hop union overlaps this index's columns,
+	 *    i.e. one of this index's attributes changed on a hop after this
+	 *    leaf's target.  The update that made that change maintained this
+	 *    index (its attribute changed), so it planted a fresh entry pointing
+	 *    at its own live tuple; that fresh entry crosses no later
+	 *    key-changing hop and uniquely supplies the row.  Dropping the stale
+	 *    ancestor therefore never removes the row's last reachable entry.
+	 *    This holds even under ABA key cycling (X -> Y -> X): the X-restoring
+	 *    update changed this index's column (Y -> X) and so planted the fresh
+	 *    entry.
 	 */
 	if (!scan->xactStartedInRecovery)
-		scan->kill_prior_tuple = all_dead;
+		scan->kill_prior_tuple =
+			all_dead ||
+			(scan->xs_hot_indexed_stale && scan->xs_heapfetch->xs_prefix_all_dead);
 
 	return found;
 }
diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c
index c8af97dd23d..de8f4e7f02c 100644
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -15,6 +15,8 @@
 
 #include "postgres.h"
 
+#include "access/genam.h"
+#include "access/htup_details.h"
 #include "access/nbtree.h"
 #include "access/nbtxlog.h"
 #include "access/tableam.h"
@@ -22,18 +24,24 @@
 #include "access/xloginsert.h"
 #include "common/int.h"
 #include "common/pg_prng.h"
+#include "executor/tuptable.h"
 #include "lib/qunique.h"
 #include "miscadmin.h"
 #include "storage/lmgr.h"
 #include "storage/predicate.h"
+#include "utils/datum.h"
 #include "utils/injection_point.h"
-
 /* Minimum tree height for application of fastpath optimization */
 #define BTREE_FASTPATH_MIN_LEVEL	2
 
 
 static BTStack _bt_search_insert(Relation rel, Relation heaprel,
 								 BTInsertState insertstate);
+
+/* Internal helper: HOT-indexed leaf-key staleness check for _bt_check_unique. */
+static bool _bt_heap_keys_equal_leaf(Relation rel, IndexTuple leaftup,
+									 struct TupleTableSlot *heapSlot);
+
 static TransactionId _bt_check_unique(Relation rel, BTInsertState insertstate,
 									  Relation heapRel,
 									  IndexUniqueCheck checkUnique, bool *is_unique,
@@ -426,6 +434,8 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel,
 	bool		inposting = false;
 	bool		prevalldead = true;
 	int			curposti = 0;
+	TupleTableSlot *chain_walk_slot = NULL;
+	bool		hi_recheck = false;
 
 	/* Assume unique until we find a duplicate */
 	*is_unique = true;
@@ -509,6 +519,7 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel,
 			{
 				ItemPointerData htid;
 				bool		all_dead = false;
+				bool		hot_indexed_stale = false;
 
 				if (!inposting)
 				{
@@ -559,13 +570,79 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel,
 				 * satisfying SnapshotDirty. This is necessary because for AMs
 				 * with optimizations like heap's HOT, we have just a single
 				 * index entry for the entire chain.
+				 *
+				 * The fetch reports (hi_recheck) whether the chain walk to
+				 * the live tuple crossed a HOT-selectively-updated (HOT/SIU)
+				 * hop. In classic HOT the chain preserves the index key, so a
+				 * live tuple anywhere in the chain is a definite conflict;
+				 * with HOT/SIU that invariant no longer holds -- an old index
+				 * entry for key K may chain-lead to a heap tuple whose actual
+				 * index key is K'.  When a hop was crossed we recheck the
+				 * leaf key against the live tuple below; a stale entry is
+				 * filtered out, not treated as a conflict.  chain_walk_slot
+				 * holds the live tuple for that recheck and is freed at every
+				 * exit.
 				 */
-				else if (table_index_fetch_tuple_check(heapRel, &htid,
+				else if ((chain_walk_slot != NULL ||
+						  (chain_walk_slot = table_slot_create(heapRel, NULL))) &&
+						 table_index_fetch_tuple_check(heapRel, &htid,
 													   &SnapshotDirty,
-													   &all_dead))
+													   &all_dead,
+													   &hi_recheck,
+													   chain_walk_slot))
 				{
 					TransactionId xwait;
 
+					/*
+					 * The chain walk reported (hi_recheck) that it crossed at
+					 * least one HOT/SIU hop on the way to the live tuple, so
+					 * the classic "live tuple in the chain implies the same
+					 * index key" invariant may not hold: an old index entry
+					 * for key K may chain-lead to a tuple whose current key
+					 * is K'.  Recheck the leaf's stored key against the live
+					 * tuple's current index form.  A mismatch means the leaf
+					 * is stale (not a conflict): skip it; the fresh entry
+					 * inserted for the current value is the canonical one.
+					 * Because the leaf still resolves to a live tuple, clear
+					 * prevalldead so the caller never marks it LP_DEAD
+					 * (killable).
+					 */
+					hot_indexed_stale =
+						(hi_recheck &&
+						 !_bt_heap_keys_equal_leaf(rel, curitup, chain_walk_slot));
+
+					if (hot_indexed_stale)
+					{
+						prevalldead = false;
+						if (nbuf != InvalidBuffer)
+							_bt_relbuf(rel, nbuf);
+						nbuf = InvalidBuffer;
+						ExecClearTuple(chain_walk_slot);
+						goto bt_chain_walk_skip;
+					}
+
+					/*
+					 * The leaf's key still matches the live tuple.  If the
+					 * chain walk crossed a HOT-indexed hop and resolved to
+					 * the very tuple the caller is inserting an entry for,
+					 * this is not a duplicate -- it is the same logical row
+					 * being re-indexed (e.g. a HOT-indexed UPDATE that left
+					 * this index's key unchanged, or a key cycled away and
+					 * back). Skip it rather than raising a spurious unique
+					 * violation.
+					 */
+					if (hi_recheck &&
+						ItemPointerCompare(&htid, &itup->t_tid) == 0)
+					{
+						if (nbuf != InvalidBuffer)
+							_bt_relbuf(rel, nbuf);
+						nbuf = InvalidBuffer;
+						ExecClearTuple(chain_walk_slot);
+						goto bt_chain_walk_skip;
+					}
+					if (chain_walk_slot != NULL)
+						ExecClearTuple(chain_walk_slot);
+
 					/*
 					 * It is a duplicate. If we are only doing a partial
 					 * check, then don't bother checking if the tuple is being
@@ -578,6 +655,8 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel,
 					{
 						if (nbuf != InvalidBuffer)
 							_bt_relbuf(rel, nbuf);
+						if (chain_walk_slot)
+							ExecDropSingleTupleTableSlot(chain_walk_slot);
 						*is_unique = false;
 						return InvalidTransactionId;
 					}
@@ -593,6 +672,8 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel,
 					{
 						if (nbuf != InvalidBuffer)
 							_bt_relbuf(rel, nbuf);
+						if (chain_walk_slot)
+							ExecDropSingleTupleTableSlot(chain_walk_slot);
 						/* Tell _bt_doinsert to wait... */
 						*speculativeToken = SnapshotDirty.speculativeToken;
 						/* Caller releases lock on buf immediately */
@@ -619,7 +700,8 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel,
 					 */
 					htid = itup->t_tid;
 					if (table_index_fetch_tuple_check(heapRel, &htid,
-													  SnapshotSelf, NULL))
+													  SnapshotSelf, NULL,
+													  NULL, NULL))
 					{
 						/* Normal case --- it's still live */
 					}
@@ -715,6 +797,9 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel,
 				 */
 				if (!all_dead && inposting)
 					prevalldead = false;
+
+		bt_chain_walk_skip:
+				;
 			}
 		}
 
@@ -782,9 +867,84 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel,
 	if (nbuf != InvalidBuffer)
 		_bt_relbuf(rel, nbuf);
 
+	if (chain_walk_slot)
+		ExecDropSingleTupleTableSlot(chain_walk_slot);
+
 	return InvalidTransactionId;
 }
 
+/*
+ *	_bt_heap_keys_equal_leaf() -- Compare a heap tuple's current btree key
+ *	against the key stored in a leaf IndexTuple.
+ *
+ *	The HOT-indexed unique-check path uses this to distinguish a live tuple
+ *	whose current key still matches the arriving leaf (a genuine conflict)
+ *	from a stale chain hit: with a HOT-indexed (Selective Index Update) chain
+ *	the leaf entry for an old key still resolves to the live tuple, whose
+ *	current index form may differ.
+ *
+ *	Equality must agree with the index's own notion of equality, because the
+ *	caller uses the verdict to decide whether to raise a unique violation.
+ *	We compare each key column with its btree ordering procedure (BTORDER_PROC,
+ *	the same support function _bt_mkscankey uses) under the column's collation
+ *	-- not a bitwise image comparison.  Bitwise equality would wrongly treat
+ *	opclass-equal but image-distinct values (numeric 1.0 vs 1.00, float -0.0
+ *	vs 0.0, text under a nondeterministic collation) as "not equal" and skip a
+ *	genuine duplicate.
+ *
+ *	This is called from _bt_check_unique while the leaf buffer is locked, so it
+ *	deliberately avoids executor machinery: it fetches each key attribute
+ *	straight from the slot.  It is only ever reached for an index receiving a
+ *	fresh entry during a HOT-indexed update, and HeapUpdateHotAllowable
+ *	disqualifies any UPDATE that touches an expression-index attribute, so the
+ *	index here has no expression key column (every indkey is a real attribute
+ *	number).  We assert that rather than handle a keycol == 0 case that cannot
+ *	occur; if expression-index selective maintenance is implemented in the
+ *	future, this is where an expression-evaluating comparison would be added.
+ *
+ *	heapSlot must already be populated by the caller (via
+ *	table_index_fetch_tuple / table_index_fetch_tuple_check).
+ */
+static bool
+_bt_heap_keys_equal_leaf(Relation rel, IndexTuple leaftup,
+						 struct TupleTableSlot *heapSlot)
+{
+	TupleDesc	indexDesc = RelationGetDescr(rel);
+	int			nkey = IndexRelationGetNumberOfKeyAttributes(rel);
+	Form_pg_index indexStruct = rel->rd_index;
+
+	Assert(leaftup != NULL);
+	Assert(heapSlot != NULL && !TTS_EMPTY(heapSlot));
+
+	for (int i = 0; i < nkey; i++)
+	{
+		AttrNumber	keycol = indexStruct->indkey.values[i];
+		Datum		heap_datum;
+		bool		heap_isnull;
+		Datum		leaf_datum;
+		bool		leaf_isnull;
+		FmgrInfo   *cmpproc;
+
+		/* Expression key columns cannot reach here (see header). */
+		Assert(keycol != 0);
+
+		heap_datum = slot_getattr(heapSlot, keycol, &heap_isnull);
+		leaf_datum = index_getattr(leaftup, i + 1, indexDesc, &leaf_isnull);
+
+		if (heap_isnull != leaf_isnull)
+			return false;
+		if (heap_isnull)
+			continue;
+
+		/* opclass 3-way compare under the column's collation; 0 == equal */
+		cmpproc = index_getprocinfo(rel, i + 1, BTORDER_PROC);
+		if (DatumGetInt32(FunctionCall2Coll(cmpproc, rel->rd_indcollation[i],
+											heap_datum, leaf_datum)) != 0)
+			return false;
+	}
+
+	return true;
+}
 
 /*
  *	_bt_findinsertloc() -- Finds an insert location for a tuple
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 3df2c752ead..f558dd0156e 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -39,6 +39,7 @@
 #include "utils/wait_event.h"
 
 
+
 /*
  * BTPARALLEL_NOT_INITIALIZED indicates that the scan has not started.
  *
@@ -408,6 +409,16 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
 	 * race condition involving VACUUM setting pages all-visible in the VM.
 	 * It's also unsafe for plain index scans that use a non-MVCC snapshot.
 	 *
+	 * Note that wanting the index tuple (xs_want_itup) is not by itself a
+	 * reason to retain the pin: btree copies each returned IndexTuple into
+	 * so->currTuples (scan-local memory) and points xs_itup there, so the
+	 * tuple stays valid after the pin is dropped.  Only genuine index-only
+	 * scans (xs_index_only), which may return a tuple without fetching the
+	 * heap and therefore rely on the VM, must keep the pin.  A plain index
+	 * scan that sets xs_want_itup merely to inspect or recheck the index
+	 * tuple still fetches and visibility-checks the heap, so it has no VM
+	 * race and may drop pins like any other plain scan.
+	 *
 	 * Also opt out of dropping leaf page pins eagerly during bitmap scans.
 	 * Pins cannot be held for more than an instant during bitmap scans either
 	 * way, so we might as well avoid wasting cycles on acquiring page LSNs.
@@ -416,7 +427,7 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
 	 *
 	 * Note: so->dropPin should never change across rescans.
 	 */
-	so->dropPin = (!scan->xs_want_itup &&
+	so->dropPin = (!scan->xs_index_only &&
 				   IsMVCCLikeSnapshot(scan->xs_snapshot) &&
 				   scan->heapRelation != NULL);
 
diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c
index 12c2674cbd7..f05cabc8c65 100644
--- a/src/backend/access/table/tableam.c
+++ b/src/backend/access/table/tableam.c
@@ -242,19 +242,31 @@ bool
 table_index_fetch_tuple_check(Relation rel,
 							  ItemPointer tid,
 							  Snapshot snapshot,
-							  bool *all_dead)
+							  bool *all_dead,
+							  bool *hot_indexed_recheck_out,
+							  TupleTableSlot *keep_slot)
 {
 	IndexFetchTableData *scan;
 	TupleTableSlot *slot;
 	bool		call_again = false;
 	bool		found;
 
-	slot = table_slot_create(rel, NULL);
+	slot = keep_slot ? keep_slot : table_slot_create(rel, NULL);
 	scan = table_index_fetch_begin(rel, SO_NONE);
 	found = table_index_fetch_tuple(scan, tid, snapshot, slot, &call_again,
 									all_dead);
+
+	/*
+	 * Surface the table AM's HOT/SIU recheck signal to the caller (the index
+	 * AM, which rechecks the arriving leaf key against the live tuple); the
+	 * scan is freed below, so copy it out.
+	 */
+	if (hot_indexed_recheck_out != NULL)
+		*hot_indexed_recheck_out = found && scan->xs_hot_indexed_recheck;
+
 	table_index_fetch_end(scan);
-	ExecDropSingleTupleTableSlot(slot);
+	if (keep_slot == NULL)
+		ExecDropSingleTupleTableSlot(slot);
 
 	return found;
 }
@@ -361,8 +373,7 @@ void
 simple_table_tuple_update(Relation rel, ItemPointer otid,
 						  TupleTableSlot *slot,
 						  Snapshot snapshot,
-						  const Bitmapset *modified_idx_attrs,
-						  TU_UpdateIndexes *update_indexes)
+						  Bitmapset **modified_attrs)
 {
 	TM_Result	result;
 	TM_FailureData tmfd;
@@ -373,8 +384,7 @@ simple_table_tuple_update(Relation rel, ItemPointer otid,
 								0, snapshot, InvalidSnapshot,
 								true /* wait for commit */ ,
 								&tmfd, &lockmode,
-								modified_idx_attrs,
-								update_indexes);
+								modified_attrs);
 
 	switch (result)
 	{
diff --git a/src/backend/catalog/indexing.c b/src/backend/catalog/indexing.c
index fd7d2ec0e3a..7eae2d66a08 100644
--- a/src/backend/catalog/indexing.c
+++ b/src/backend/catalog/indexing.c
@@ -18,11 +18,14 @@
 #include "access/genam.h"
 #include "access/heapam.h"
 #include "access/htup_details.h"
+#include "access/tableam.h"
 #include "access/xact.h"
 #include "catalog/index.h"
 #include "catalog/indexing.h"
 #include "executor/executor.h"
+#include "nodes/bitmapset.h"
 #include "utils/rel.h"
+#include "utils/relcache.h"
 
 
 /*
@@ -69,11 +72,16 @@ CatalogCloseIndexes(CatalogIndexState indstate)
  *
  * This should be called for each inserted or updated catalog tuple.
  *
- * This is effectively a cut-down version of ExecInsertIndexTuples.
+ * This is effectively a cut-down version of ExecInsertIndexTuples.  For
+ * UPDATE paths the caller supplies update_all_indexes (from
+ * table_tuple_update / simple_heap_update) so we can tell which indexes actually need a new entry:
+ * update_all_indexes is true for a fresh insert or a non-HOT update (every
+ * index gets an entry), false for a classic-HOT catalog update (non-summarizing
+ * indexes are skipped, since their existing entries still resolve the chain).
  */
 static void
 CatalogIndexInsert(CatalogIndexState indstate, HeapTuple heapTuple,
-				   TU_UpdateIndexes updateIndexes)
+				   bool update_all_indexes)
 {
 	int			i;
 	int			numIndexes;
@@ -83,20 +91,6 @@ CatalogIndexInsert(CatalogIndexState indstate, HeapTuple heapTuple,
 	IndexInfo **indexInfoArray;
 	Datum		values[INDEX_MAX_KEYS];
 	bool		isnull[INDEX_MAX_KEYS];
-	bool		onlySummarized = (updateIndexes == TU_Summarizing);
-
-	/*
-	 * HOT update does not require index inserts. But with asserts enabled we
-	 * want to check that it'd be legal to currently insert into the
-	 * table/index.
-	 */
-#ifndef USE_ASSERT_CHECKING
-	if (HeapTupleIsHeapOnly(heapTuple) && !onlySummarized)
-		return;
-#endif
-
-	/* When only updating summarized indexes, the tuple has to be HOT. */
-	Assert((!onlySummarized) || HeapTupleIsHeapOnly(heapTuple));
 
 	/*
 	 * Get information from the state structure.  Fall out if nothing to do.
@@ -120,6 +114,7 @@ CatalogIndexInsert(CatalogIndexState indstate, HeapTuple heapTuple,
 	{
 		IndexInfo  *indexInfo;
 		Relation	index;
+		bool		index_unchanged;
 
 		indexInfo = indexInfoArray[i];
 		index = relationDescs[i];
@@ -138,22 +133,20 @@ CatalogIndexInsert(CatalogIndexState indstate, HeapTuple heapTuple,
 		Assert(index->rd_index->indimmediate);
 		Assert(indexInfo->ii_NumIndexKeyAttrs != 0);
 
-		/* see earlier check above */
-#ifdef USE_ASSERT_CHECKING
-		if (HeapTupleIsHeapOnly(heapTuple) && !onlySummarized)
-		{
-			Assert(!ReindexIsProcessingIndex(RelationGetRelid(index)));
-			continue;
-		}
-#endif							/* USE_ASSERT_CHECKING */
-
 		/*
-		 * Skip insertions into non-summarizing indexes if we only need to
-		 * update summarizing indexes.
+		 * Decide whether this index needs a new entry.  On INSERT or a
+		 * non-HOT update (update_all_indexes) every index gets one.  On a
+		 * classic-HOT catalog update no indexed attribute changed, so the
+		 * non-summarizing indexes are skipped (summarizing indexes always get
+		 * a chance to update their block-level summaries below).
 		 */
-		if (onlySummarized && !indexInfo->ii_Summarizing)
+		index_unchanged = !update_all_indexes;
+		indexInfo->ii_IndexUnchanged = index_unchanged;
+
+		if (index_unchanged && !indexInfo->ii_Summarizing)
 			continue;
 
+
 		/*
 		 * FormIndexDatum fills in its values and isnull parameters with the
 		 * appropriate values for the column(s) of the index.
@@ -240,7 +233,7 @@ CatalogTupleInsert(Relation heapRel, HeapTuple tup)
 
 	simple_heap_insert(heapRel, tup);
 
-	CatalogIndexInsert(indstate, tup, TU_All);
+	CatalogIndexInsert(indstate, tup, true);
 	CatalogCloseIndexes(indstate);
 }
 
@@ -260,7 +253,7 @@ CatalogTupleInsertWithInfo(Relation heapRel, HeapTuple tup,
 
 	simple_heap_insert(heapRel, tup);
 
-	CatalogIndexInsert(indstate, tup, TU_All);
+	CatalogIndexInsert(indstate, tup, true);
 }
 
 /*
@@ -291,7 +284,7 @@ CatalogTuplesMultiInsertWithInfo(Relation heapRel, TupleTableSlot **slot,
 
 		tuple = ExecFetchSlotHeapTuple(slot[i], true, &should_free);
 		tuple->t_tableOid = slot[i]->tts_tableOid;
-		CatalogIndexInsert(indstate, tuple, TU_All);
+		CatalogIndexInsert(indstate, tuple, true);
 
 		if (should_free)
 			heap_freetuple(tuple);
@@ -313,15 +306,15 @@ void
 CatalogTupleUpdate(Relation heapRel, const ItemPointerData *otid, HeapTuple tup)
 {
 	CatalogIndexState indstate;
-	TU_UpdateIndexes updateIndexes = TU_All;
+	bool		update_all_indexes;
 
 	CatalogTupleCheckConstraints(heapRel, tup);
 
 	indstate = CatalogOpenIndexes(heapRel);
 
-	simple_heap_update(heapRel, otid, tup, &updateIndexes);
+	simple_heap_update(heapRel, otid, tup, &update_all_indexes);
 
-	CatalogIndexInsert(indstate, tup, updateIndexes);
+	CatalogIndexInsert(indstate, tup, update_all_indexes);
 	CatalogCloseIndexes(indstate);
 }
 
@@ -337,13 +330,13 @@ void
 CatalogTupleUpdateWithInfo(Relation heapRel, const ItemPointerData *otid, HeapTuple tup,
 						   CatalogIndexState indstate)
 {
-	TU_UpdateIndexes updateIndexes = TU_All;
+	bool		update_all_indexes;
 
 	CatalogTupleCheckConstraints(heapRel, tup);
 
-	simple_heap_update(heapRel, otid, tup, &updateIndexes);
+	simple_heap_update(heapRel, otid, tup, &update_all_indexes);
 
-	CatalogIndexInsert(indstate, tup, updateIndexes);
+	CatalogIndexInsert(indstate, tup, update_all_indexes);
 }
 
 /*
diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c
index 4aa52a4bd25..e0bc01f63d3 100644
--- a/src/backend/catalog/toasting.c
+++ b/src/backend/catalog/toasting.c
@@ -307,8 +307,6 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
 	indexInfo->ii_Unique = true;
 	indexInfo->ii_NullsNotDistinct = false;
 	indexInfo->ii_ReadyForInserts = true;
-	indexInfo->ii_CheckedUnchanged = false;
-	indexInfo->ii_IndexUnchanged = false;
 	indexInfo->ii_Concurrent = false;
 	indexInfo->ii_BrokenHotChain = false;
 	indexInfo->ii_ParallelWorkers = 0;
diff --git a/src/backend/commands/repack.c b/src/backend/commands/repack.c
index bf45c5cf826..13e8762e7f4 100644
--- a/src/backend/commands/repack.c
+++ b/src/backend/commands/repack.c
@@ -2678,14 +2678,14 @@ apply_concurrent_update(Relation rel, TupleTableSlot *spilled_tuple,
 {
 	LockTupleMode lockmode;
 	TM_FailureData tmfd;
-	TU_UpdateIndexes update_indexes;
 	Bitmapset  *modified_idx_attrs;
 	TM_Result	res;
 
 	/*
 	 * Compute the set of modified indexed attributes by comparing the old
-	 * (ondisk) and new (spilled) tuples; heap_update needs it for a correct
-	 * HOT decision (a NULL set would look like "no indexed column changed").
+	 * (ondisk) and new (spilled) tuples.  heap_update needs this to make a
+	 * correct HOT decision; without it modified_idx_attrs would be NULL and
+	 * heap_update would always treat the update as HOT-eligible.
 	 */
 	modified_idx_attrs = ExecUpdateModifiedIdxAttrs(chgcxt->cc_rri,
 													ondisk_tuple,
@@ -2700,29 +2700,33 @@ apply_concurrent_update(Relation rel, TupleTableSlot *spilled_tuple,
 							 InvalidSnapshot,
 							 InvalidSnapshot,
 							 false,
-							 &tmfd, &lockmode, modified_idx_attrs, &update_indexes);
+							 &tmfd, &lockmode,
+							 &modified_idx_attrs);
 	if (res != TM_Ok)
 		ereport(ERROR,
 				errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 				errmsg("could not apply concurrent %s on relation \"%s\"",
 					   "UPDATE", RelationGetRelationName(rel)));
 
-	if (update_indexes != TU_None)
+	if (chgcxt->cc_rri->ri_NumIndices > 0 &&
+		!bms_is_empty(modified_idx_attrs))
 	{
-		uint32		flags = EIIT_IS_UPDATE;
+		bool		all_indexes =
+			bms_is_member(TableTupleUpdateAllIndexes, modified_idx_attrs);
 
-		if (update_indexes == TU_Summarizing)
-			flags |= EIIT_ONLY_SUMMARIZING;
+		ExecSetIndexUnchanged(chgcxt->cc_rri, modified_idx_attrs);
 		ExecInsertIndexTuples(chgcxt->cc_rri,
 							  chgcxt->cc_estate,
-							  flags,
+							  EIIT_IS_UPDATE |
+							  (all_indexes ?
+							   0 : EIIT_IS_HOT_INDEXED),
 							  spilled_tuple,
 							  NIL, NULL);
 	}
 
-	pgstat_progress_incr_param(PROGRESS_REPACK_HEAP_TUPLES_UPDATED, 1);
-
 	bms_free(modified_idx_attrs);
+
+	pgstat_progress_incr_param(PROGRESS_REPACK_HEAP_TUPLES_UPDATED, 1);
 }
 
 static void
diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c
index eb383812901..5328db877cd 100644
--- a/src/backend/executor/execIndexing.c
+++ b/src/backend/executor/execIndexing.c
@@ -113,11 +113,13 @@
 #include "catalog/index.h"
 #include "executor/executor.h"
 #include "nodes/nodeFuncs.h"
+#include "pgstat.h"
 #include "storage/lmgr.h"
 #include "utils/injection_point.h"
 #include "utils/lsyscache.h"
 #include "utils/multirangetypes.h"
 #include "utils/rangetypes.h"
+#include "utils/rel.h"
 #include "utils/snapmgr.h"
 
 /* waitMode argument to check_exclusion_or_unique_constraint() */
@@ -140,11 +142,6 @@ static bool check_exclusion_or_unique_constraint(Relation heap, Relation index,
 static bool index_recheck_constraint(Relation index, const Oid *constr_procs,
 									 const Datum *existing_values, const bool *existing_isnull,
 									 const Datum *new_values);
-static bool index_unchanged_by_update(ResultRelInfo *resultRelInfo,
-									  EState *estate, IndexInfo *indexInfo,
-									  Relation indexRelation);
-static bool index_expression_changed_walker(Node *node,
-											Bitmapset *allUpdatedCols);
 static void ExecWithoutOverlapsNotEmpty(Relation rel, NameData attname, Datum attval,
 										char typtype, Oid atttypid);
 
@@ -277,24 +274,12 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
  *		into all the relations indexing the result relation
  *		when a heap tuple is inserted into the result relation.
  *
- *		When EIIT_IS_UPDATE is set and EIIT_ONLY_SUMMARIZING isn't,
- *		executor is performing an UPDATE that could not use an
- *		optimization like heapam's HOT (in more general terms a
- *		call to table_tuple_update() took place and set
- *		'update_indexes' to TU_All).  Receiving this hint makes
- *		us consider if we should pass down the 'indexUnchanged'
- *		hint in turn.  That's something that we figure out for
- *		each index_insert() call iff EIIT_IS_UPDATE is set.
- *		(When that flag is not set we already know not to pass the
- *		hint to any index.)
- *
- *		If EIIT_ONLY_SUMMARIZING is set, an equivalent optimization to
- *		HOT has been applied and any updated columns are indexed
- *		only by summarizing indexes (or in more general terms a
- *		call to table_tuple_update() took place and set
- *		'update_indexes' to TU_Summarizing). We can (and must)
- *		therefore only update the indexes that have
- *		'amsummarizing' = true.
+ *		When EIIT_IS_UPDATE is set, the executor is performing an
+ *		UPDATE.  The per-index ii_IndexUnchanged flag (populated by
+ *		ExecSetIndexUnchanged()) indicates whether each index's key
+ *		values are unchanged by this update.  When ii_IndexUnchanged
+ *		is true, we pass indexUnchanged=true to index_insert() as a
+ *		hint for bottom-up deletion optimization.
  *
  *		Unique and exclusion constraints are enforced at the same
  *		time.  This returns a list of index OIDs for any unique or
@@ -370,11 +355,41 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
 			continue;
 
 		/*
-		 * Skip processing of non-summarizing indexes if we only update
-		 * summarizing indexes
+		 * UPDATE skip rule.  ExecSetIndexUnchanged populated
+		 * ii_IndexNeedsUpdate for every index: true when the table AM stored
+		 * an independent new version, or when any attribute the index
+		 * references (key, INCLUDE, expression, or partial-predicate column)
+		 * overlaps the modified-attrs bitmap.  When it is false on a
+		 * non-summarizing index we skip the insert entirely; the HOT chain
+		 * keeps existing entries pointing at the chain root.  Summarizing
+		 * indexes always get a chance to update their block-level summaries.
 		 */
-		if ((flags & EIIT_ONLY_SUMMARIZING) && !indexInfo->ii_Summarizing)
+		if ((flags & EIIT_IS_UPDATE) &&
+			!indexInfo->ii_IndexNeedsUpdate &&
+			!indexInfo->ii_Summarizing)
+		{
+			/*
+			 * This index was skipped because its key attributes did not
+			 * change.  When the overall update is a HOT-indexed update (some
+			 * other non-summarizing index did change), record the skip on
+			 * this index's pgstat entry.  A classic-HOT update (no indexed
+			 * attribute changed) does not reach this path --
+			 * ExecInsertIndexTuples is only invoked when at least one index
+			 * needs a fresh entry.
+			 */
+			if (flags & EIIT_IS_HOT_INDEXED)
+				pgstat_count_hot_indexed_upd_skipped(indexRelation);
 			continue;
+		}
+
+		/*
+		 * Non-skipped index under a HOT-indexed update: this index is
+		 * receiving a fresh entry because one of its key attributes changed.
+		 * Summarizing indexes always insert regardless of the HOT-indexed
+		 * decision (same as classic HOT), so they are not counted here.
+		 */
+		if ((flags & EIIT_IS_HOT_INDEXED) && !indexInfo->ii_Summarizing)
+			pgstat_count_hot_indexed_upd_matched(indexRelation);
 
 		/* Check for partial index */
 		if (indexInfo->ii_Predicate != NIL)
@@ -436,15 +451,13 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
 			checkUnique = UNIQUE_CHECK_PARTIAL;
 
 		/*
-		 * There's definitely going to be an index_insert() call for this
-		 * index.  If we're being called as part of an UPDATE statement,
-		 * consider if the 'indexUnchanged' = true hint should be passed.
+		 * For UPDATE operations, use the per-index ii_IndexUnchanged flag
+		 * (populated by ExecSetIndexUnchanged) to hint whether the index
+		 * values are unchanged.  This helps the index AM optimize for
+		 * bottom-up deletion of duplicate index entries.
 		 */
-		indexUnchanged = ((flags & EIIT_IS_UPDATE) &&
-						  index_unchanged_by_update(resultRelInfo,
-													estate,
-													indexInfo,
-													indexRelation));
+		indexUnchanged = (flags & EIIT_IS_UPDATE) ?
+			indexInfo->ii_IndexUnchanged : false;
 
 		satisfiesConstraint =
 			index_insert(indexRelation, /* index relation */
@@ -721,6 +734,7 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index,
 	int			i;
 	bool		conflict;
 	bool		found_self;
+	bool		found_self_siu_hit;
 	ExprContext *econtext;
 	TupleTableSlot *existing_slot;
 	TupleTableSlot *save_scantuple;
@@ -823,6 +837,7 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index,
 retry:
 	conflict = false;
 	found_self = false;
+	found_self_siu_hit = false;
 	index_scan = index_beginscan(heap, index,
 								 &DirtySnapshot, NULL, indnkeyatts, 0,
 								 SO_NONE);
@@ -838,14 +853,28 @@ retry:
 		char	   *error_existing;
 
 		/*
-		 * Ignore the entry for the tuple we're trying to check.
+		 * Ignore the entry for the tuple we're trying to check.  With HOT-
+		 * indexed (hot-indexed) updates, several index entries may chain-lead
+		 * to the same heap tuple (a stale entry for the old key and a fresh
+		 * entry for the new key).  They all resolve to the same TID here and
+		 * must all be treated as "self", not as a duplicate error.  We
+		 * tolerate the duplicate self arrival whenever *either* this
+		 * iteration or an earlier one saw xs_hot_indexed_stale -- the
+		 * canonical direct entry and the stale chain-walk entries can arrive
+		 * in either order.
 		 */
 		if (ItemPointerIsValid(tupleid) &&
 			ItemPointerEquals(tupleid, &existing_slot->tts_tid))
 		{
-			if (found_self)		/* should not happen */
+			if (index_scan->xs_hot_indexed_stale)
+				found_self_siu_hit = true;
+			if (found_self)
+			{
+				if (found_self_siu_hit)
+					continue;
 				elog(ERROR, "found self tuple multiple times in index \"%s\"",
 					 RelationGetRelationName(index));
+			}
 			found_self = true;
 			continue;
 		}
@@ -869,6 +898,31 @@ retry:
 								 * conflict */
 		}
 
+		/*
+		 * HOT-indexed chains can reach this loop via a stale btree leaf entry
+		 * whose key is different from the heap tuple's current index-form.
+		 * existing_values holds the current heap tuple's index-form
+		 * (FormIndexDatum above).  Compare it against our new tuple's values
+		 * using the same constraint operators; if they don't agree, the
+		 * chain-walked tuple is not actually in conflict with our insertion
+		 * -- it just shared a TID with a stale leaf entry we happened to scan
+		 * through.  Skip it.
+		 *
+		 * This mirrors _bt_check_unique's HOT-indexed recheck path; for
+		 * exclusion constraints the user-supplied operator in constr_procs
+		 * replaces the btree equality comparator, and
+		 * index_recheck_constraint does the right thing for either.
+		 */
+		if (index_scan->xs_hot_indexed_stale)
+		{
+			if (!index_recheck_constraint(index,
+										  constr_procs,
+										  existing_values,
+										  existing_isnull,
+										  values))
+				continue;		/* stale chain hit, not a real conflict */
+		}
+
 		/*
 		 * At this point we have either a conflict or a potential conflict.
 		 *
@@ -1009,149 +1063,94 @@ index_recheck_constraint(Relation index, const Oid *constr_procs,
 }
 
 /*
- * Check if ExecInsertIndexTuples() should pass indexUnchanged hint.
+ * ExecSetIndexUnchanged
+ *
+ * Populate two per-index flags ahead of ExecInsertIndexTuples:
+ *
+ *   - ii_IndexNeedsUpdate (wide) drives the skip decision.  It is true when
+ *     the table AM stored an independent new version (whole-row attribute
+ *     present in modified_idx_attrs) or when any attribute the index
+ *     references -- key, INCLUDE, expression, or partial-predicate column,
+ *     per RelationGetIndexedAttrs() -- changed.  A non-summarizing index for
+ *     which this is false is skipped: its existing entry keeps resolving the
+ *     HOT chain.
  *
- * When the executor performs an UPDATE that requires a new round of index
- * tuples, determine if we should pass 'indexUnchanged' = true hint for one
- * single index.
+ *   - ii_IndexUnchanged (narrow) is the indexUnchanged hint to aminsert,
+ *     consumed by nbtree deduplication / bottom-up deletion.  Per the
+ *     historical rule it counts only key columns; INCLUDE and predicate
+ *     columns are deliberately ignored, and an expression key is treated
+ *     conservatively as possibly changed.
  */
-static bool
-index_unchanged_by_update(ResultRelInfo *resultRelInfo, EState *estate,
-						  IndexInfo *indexInfo, Relation indexRelation)
+void
+ExecSetIndexUnchanged(ResultRelInfo *resultRelInfo,
+					  const Bitmapset *modified_idx_attrs)
 {
-	Bitmapset  *updatedCols;
-	Bitmapset  *extraUpdatedCols;
-	Bitmapset  *allUpdatedCols;
-	bool		hasexpression = false;
-	List	   *idxExprs;
-
-	/*
-	 * Check cache first
-	 */
-	if (indexInfo->ii_CheckedUnchanged)
-		return indexInfo->ii_IndexUnchanged;
-	indexInfo->ii_CheckedUnchanged = true;
-
-	/*
-	 * Check for indexed attribute overlap with updated columns.
-	 *
-	 * Only do this for key columns.  A change to a non-key column within an
-	 * INCLUDE index should not be counted here.  Non-key column values are
-	 * opaque payload state to the index AM, a little like an extra table TID.
-	 *
-	 * Note that row-level BEFORE triggers won't affect our behavior, since
-	 * they don't affect the updatedCols bitmaps generally.  It doesn't seem
-	 * worth the trouble of checking which attributes were changed directly.
-	 */
-	updatedCols = ExecGetUpdatedCols(resultRelInfo, estate);
-	extraUpdatedCols = ExecGetExtraUpdatedCols(resultRelInfo, estate);
-	for (int attr = 0; attr < indexInfo->ii_NumIndexKeyAttrs; attr++)
-	{
-		int			keycol = indexInfo->ii_IndexAttrNumbers[attr];
-
-		if (keycol <= 0)
-		{
-			/*
-			 * Skip expressions for now, but remember to deal with them later
-			 * on
-			 */
-			hasexpression = true;
-			continue;
-		}
+	int			numIndices = resultRelInfo->ri_NumIndices;
+	IndexInfo **indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
+	RelationPtr indexDescs = resultRelInfo->ri_IndexRelationDescs;
+	bool		all_indexes;
 
-		if (bms_is_member(keycol - FirstLowInvalidHeapAttributeNumber,
-						  updatedCols) ||
-			bms_is_member(keycol - FirstLowInvalidHeapAttributeNumber,
-						  extraUpdatedCols))
-		{
-			/* Changed key column -- don't hint for this index */
-			indexInfo->ii_IndexUnchanged = false;
-			return false;
-		}
-	}
-
-	/*
-	 * When we get this far and index has no expressions, return true so that
-	 * index_insert() call will go on to pass 'indexUnchanged' = true hint.
-	 *
-	 * The _absence_ of an indexed key attribute that overlaps with updated
-	 * attributes (in addition to the total absence of indexed expressions)
-	 * shows that the index as a whole is logically unchanged by UPDATE.
-	 */
-	if (!hasexpression)
-	{
-		indexInfo->ii_IndexUnchanged = true;
-		return true;
-	}
+	if (numIndices == 0)
+		return;
 
 	/*
-	 * Need to pass only one bms to expression_tree_walker helper function.
-	 * Avoid allocating memory in common case where there are no extra cols.
+	 * A whole-row entry in modified_idx_attrs means the table AM stored an
+	 * independent new version (e.g. at a new TID), so every index needs a
+	 * fresh entry regardless of which attributes changed.
 	 */
-	if (!extraUpdatedCols)
-		allUpdatedCols = updatedCols;
-	else
-		allUpdatedCols = bms_union(updatedCols, extraUpdatedCols);
+	all_indexes = bms_is_member(TableTupleUpdateAllIndexes, modified_idx_attrs);
 
-	/*
-	 * We have to work slightly harder in the event of indexed expressions,
-	 * but the principle is the same as before: try to find columns (Vars,
-	 * actually) that overlap with known-updated columns.
-	 *
-	 * If we find any matching Vars, don't pass hint for index.  Otherwise
-	 * pass hint.
-	 */
-	idxExprs = RelationGetIndexExpressions(indexRelation);
-	hasexpression = index_expression_changed_walker((Node *) idxExprs,
-													allUpdatedCols);
-	list_free(idxExprs);
-	if (extraUpdatedCols)
-		bms_free(allUpdatedCols);
-
-	if (hasexpression)
+	for (int i = 0; i < numIndices; i++)
 	{
-		indexInfo->ii_IndexUnchanged = false;
-		return false;
-	}
+		IndexInfo  *indexInfo = indexInfoArray[i];
+		Relation	indexDesc = indexDescs[i];
+		Bitmapset  *indexedattrs;
+		bool		keychanged;
 
-	/*
-	 * Deliberately don't consider index predicates.  We should even give the
-	 * hint when result rel's "updated tuple" has no corresponding index
-	 * tuple, which is possible with a partial index (provided the usual
-	 * conditions are met).
-	 */
-	indexInfo->ii_IndexUnchanged = true;
-	return true;
-}
-
-/*
- * Indexed expression helper for index_unchanged_by_update().
- *
- * Returns true when Var that appears within allUpdatedCols located.
- */
-static bool
-index_expression_changed_walker(Node *node, Bitmapset *allUpdatedCols)
-{
-	if (node == NULL)
-		return false;
+		if (indexDesc == NULL)
+			continue;
 
-	if (IsA(node, Var))
-	{
-		Var		   *var = (Var *) node;
+		/*
+		 * Skip decision (wide).  The index needs a new entry if the AM stored
+		 * an independent version, or if any attribute it references -- key,
+		 * INCLUDE, expression, or partial-predicate column -- changed.
+		 * RelationGetIndexedAttrs() covers all of those.  (An UPDATE that
+		 * touches an expression-index attribute never reaches the HOT-indexed
+		 * path: HeapUpdateHotAllowable disqualifies it, pending
+		 * expression-aware maintenance.)
+		 */
+		indexedattrs = RelationGetIndexedAttrs(indexDesc);
+		indexInfo->ii_IndexNeedsUpdate =
+			all_indexes || bms_overlap(indexedattrs, modified_idx_attrs);
+		bms_free(indexedattrs);
 
-		if (bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
-						  allUpdatedCols))
+		/*
+		 * aminsert hint (narrow).  ii_IndexUnchanged feeds nbtree
+		 * deduplication / bottom-up deletion heuristics and, per the
+		 * historical rule, counts only key columns: a change to an INCLUDE
+		 * column or to a partial-index predicate column does not disqualify
+		 * the hint.  An expression key column is treated conservatively as
+		 * possibly changed.
+		 */
+		keychanged = false;
+		for (int k = 0; k < indexInfo->ii_NumIndexKeyAttrs; k++)
 		{
-			/* Var was updated -- indicates that we should not hint */
-			return true;
-		}
+			AttrNumber	keycol = indexInfo->ii_IndexAttrNumbers[k];
 
-		/* Still haven't found a reason to not pass the hint */
-		return false;
+			if (keycol == 0)	/* expression key: assume it may have changed */
+			{
+				keychanged = true;
+				break;
+			}
+			if (bms_is_member(keycol - FirstLowInvalidHeapAttributeNumber,
+							  modified_idx_attrs))
+			{
+				keychanged = true;
+				break;
+			}
+		}
+		indexInfo->ii_IndexUnchanged = !keychanged;
 	}
-
-	return expression_tree_walker(node, index_expression_changed_walker,
-								  allUpdatedCols);
 }
 
 /*
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 6262f71bd93..7b5bbdbfd7a 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -217,6 +217,18 @@ retry:
 	/* Try to find the tuple */
 	while (index_getnext_slot(scan, ForwardScanDirection, outslot))
 	{
+		/*
+		 * A HOT-indexed update can leave a stale index leaf: an entry whose
+		 * key is a pre-update value but whose TID chain-resolves to a live
+		 * tuple now carrying a different key.  Such a tuple is not the
+		 * replica-identity match we are looking for (and the PK/RI fast path
+		 * below skips the equality recheck that would otherwise catch it), so
+		 * drop it -- exactly as IndexScan/IndexOnlyScan do.  The fresh leaf
+		 * for the current key, if any, is returned by a later iteration.
+		 */
+		if (scan->xs_hot_indexed_stale)
+			continue;
+
 		/*
 		 * Avoid expensive equality check if the index is primary key or
 		 * replica identity index.
@@ -678,6 +690,10 @@ RelationFindDeletedTupleInfoByIndex(Relation rel, Oid idxoid,
 	/* Try to find the tuple */
 	while (index_getnext_slot(scan, ForwardScanDirection, scanslot))
 	{
+		/* Skip stale HOT-indexed leaves (see RelationFindReplTupleByIndex). */
+		if (scan->xs_hot_indexed_stale)
+			continue;
+
 		/*
 		 * Avoid expensive equality check if the index is primary key or
 		 * replica identity index.
@@ -911,7 +927,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
 	bool		skip_tuple = false;
 	Relation	rel = resultRelInfo->ri_RelationDesc;
 	ItemPointer tid = &(searchslot->tts_tid);
-	Bitmapset  *modified_idx_attrs;
+	Bitmapset  *modified_idx_attrs = NULL;
 
 	/*
 	 * We support only non-system tables, with
@@ -934,7 +950,6 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
 	if (!skip_tuple)
 	{
 		List	   *recheckIndexes = NIL;
-		TU_UpdateIndexes update_indexes;
 		List	   *conflictindexes;
 		bool		conflict = false;
 
@@ -953,27 +968,34 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
 		modified_idx_attrs = ExecUpdateModifiedIdxAttrs(resultRelInfo,
 														searchslot, slot);
 
+		Assert(!bms_is_member(TableTupleUpdateAllIndexes, modified_idx_attrs));
 		simple_table_tuple_update(rel, tid, slot, estate->es_snapshot,
-								  modified_idx_attrs, &update_indexes);
-		bms_free(modified_idx_attrs);
-
+								  &modified_idx_attrs);
 
 		conflictindexes = resultRelInfo->ri_onConflictArbiterIndexes;
 
-		if (resultRelInfo->ri_NumIndices > 0 && (update_indexes != TU_None))
+		if (resultRelInfo->ri_NumIndices > 0 &&
+			!bms_is_empty(modified_idx_attrs))
 		{
+			bool		all_indexes =
+				bms_is_member(TableTupleUpdateAllIndexes, modified_idx_attrs);
 			uint32		flags = EIIT_IS_UPDATE;
 
 			if (conflictindexes != NIL)
 				flags |= EIIT_NO_DUPE_ERROR;
-			if (update_indexes == TU_Summarizing)
-				flags |= EIIT_ONLY_SUMMARIZING;
+			if (!all_indexes)
+				flags |= EIIT_IS_HOT_INDEXED;
+
+			ExecSetIndexUnchanged(resultRelInfo, modified_idx_attrs);
+
 			recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
 												   estate, flags,
 												   slot, conflictindexes,
 												   &conflict);
 		}
 
+		bms_free(modified_idx_attrs);
+
 		/*
 		 * Refer to the comments above the call to CheckAndReportConflict() in
 		 * ExecSimpleRelationInsert to understand why this check is done at
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
index d52012e8a69..fd0247eba37 100644
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -31,6 +31,7 @@
 #include "postgres.h"
 
 #include "access/genam.h"
+#include "access/nbtree.h"
 #include "access/relscan.h"
 #include "access/tableam.h"
 #include "access/tupdesc.h"
@@ -104,6 +105,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
 
 		/* Set it up for index-only scan */
 		node->ioss_ScanDesc->xs_want_itup = true;
+		node->ioss_ScanDesc->xs_index_only = true;
 		node->ioss_VMBuffer = InvalidBuffer;
 
 		/*
@@ -172,6 +174,25 @@ IndexOnlyNext(IndexOnlyScanState *node)
 			if (!index_fetch_heap(scandesc, node->ioss_TableSlot))
 				continue;		/* no visible tuple, try next index entry */
 
+			/*
+			 * HOT-indexed stale entry: if the chain walk to reach this tuple
+			 * crossed a hot-indexed hop that changed an attribute this index
+			 * covers, the leaf we arrived through is stale.  For IOS we serve
+			 * values out of xs_itup, so a stale leaf would surface the wrong
+			 * values; drop it.  The fresh entry for the new value returns the
+			 * row with correct values via its own path.  Prune keeps any page
+			 * that can carry such a stale leaf -- one with a redirect to a
+			 * live HEAP_INDEXED_UPDATED tuple -- out of the visibility map
+			 * (see heap_prune_record_redirect), so an index-only scan always
+			 * reaches this heap fetch when staleness could apply.
+			 */
+			if (scandesc->xs_hot_indexed_stale)
+			{
+				InstrCountFiltered2(node, 1);
+				ExecClearTuple(node->ioss_TableSlot);
+				continue;
+			}
+
 			ExecClearTuple(node->ioss_TableSlot);
 
 			/*
@@ -229,6 +250,16 @@ IndexOnlyNext(IndexOnlyScanState *node)
 			}
 		}
 
+		/*
+		 * No HOT-indexed staleness check is needed on the VM-all-visible path
+		 * (where we skipped the heap fetch).  Prune keeps any page that could
+		 * carry a stale leaf -- one with a redirect to a live
+		 * HEAP_INDEXED_UPDATED tuple -- out of the visibility map, so an
+		 * all-visible entry never crossed a HOT/SIU hop.  (index_getnext_tid
+		 * also resets xs_hot_indexed_stale per entry, and only the heap fetch
+		 * in index_fetch_heap ever sets it, so it cannot be set here anyway.)
+		 */
+
 		/*
 		 * We don't currently support rechecking ORDER BY distances.  (In
 		 * principle, if the index can support retrieval of the originally
@@ -775,6 +806,7 @@ ExecIndexOnlyScanInitializeDSM(IndexOnlyScanState *node,
 								 ScanRelIsReadOnly(&node->ss) ?
 								 SO_HINT_REL_READ_ONLY : SO_NONE);
 	node->ioss_ScanDesc->xs_want_itup = true;
+	node->ioss_ScanDesc->xs_index_only = true;
 	node->ioss_VMBuffer = InvalidBuffer;
 
 	/*
@@ -825,6 +857,7 @@ ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node,
 								 ScanRelIsReadOnly(&node->ss) ?
 								 SO_HINT_REL_READ_ONLY : SO_NONE);
 	node->ioss_ScanDesc->xs_want_itup = true;
+	node->ioss_ScanDesc->xs_index_only = true;
 
 	/*
 	 * If no run-time keys to calculate or they are ready, go ahead and pass
diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c
index 39f6691ee35..2329a717118 100644
--- a/src/backend/executor/nodeIndexscan.c
+++ b/src/backend/executor/nodeIndexscan.c
@@ -32,6 +32,7 @@
 #include "access/nbtree.h"
 #include "access/relscan.h"
 #include "access/tableam.h"
+#include "catalog/index.h"
 #include "catalog/pg_am.h"
 #include "executor/executor.h"
 #include "executor/instrument.h"
@@ -151,6 +152,20 @@ IndexNext(IndexScanState *node)
 			}
 		}
 
+		/*
+		 * HOT-indexed stale entry: the chain we walked to reach this tuple
+		 * crossed a hot-indexed hop that changed an attribute this index
+		 * covers, so the leaf entry we arrived through is stale.  Drop it;
+		 * the fresh entry inserted for the new value returns the row through
+		 * its own path.  Staleness was decided by the heap AM via per-hop
+		 * modified-attrs bitmaps (see heap_hot_search_buffer).
+		 */
+		if (scandesc->xs_hot_indexed_stale)
+		{
+			InstrCountFiltered2(node, 1);
+			continue;
+		}
+
 		return slot;
 	}
 
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 1e79307d33c..3243be33a9b 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -130,7 +130,14 @@ typedef struct ModifyTableContext
 typedef struct UpdateContext
 {
 	bool		crossPartUpdate;	/* was it a cross-partition update? */
-	TU_UpdateIndexes updateIndexes; /* Which index updates are required? */
+
+	/*
+	 * Set of indexed attributes the UPDATE changed (in/out for the table AM's
+	 * update callback).  Populated by ExecUpdateAct and consumed by
+	 * ExecUpdateEpilogue; the AM adds the whole-row attribute
+	 * (TableTupleUpdateAllIndexes) when every index needs a fresh entry.
+	 */
+	Bitmapset  *modified_attrs;
 
 	/*
 	 * Lock mode to acquire on the latest tuple version before performing
@@ -238,25 +245,23 @@ ExecUpdateModifiedIdxAttrs(ResultRelInfo *resultRelInfo,
 		return NULL;
 
 	/*
-	 * Get the set of all attributes across all indexes for this relation from
-	 * the relcache, it returns us a copy of the bitmap so we can modify it.
+	 * Determine which indexed attributes actually changed value by comparing
+	 * the old and new tuples attribute-by-attribute over the relation's full
+	 * indexed-attribute set.  We deliberately do NOT try to narrow the work
+	 * using the SQL UPDATE's target list (ExecGetAllUpdatedCols): that list
+	 * does not capture indexed columns mutated outside the SET clause, such
+	 * as a column rewritten by a BEFORE/INSTEAD-OF trigger via
+	 * heap_modify_tuple (see tsvector_update_trigger() in tsearch.sql), the
+	 * implicit temporal range column of a FOR PORTION OF update, or the
+	 * pre-built tuples applied by REPACK (CONCURRENTLY) and logical
+	 * replication through a synthetic ResultRelInfo.  Comparing the actual
+	 * tuple values is always correct.
 	 *
-	 * Note: We intentionally scan all indexed columns when looking for
-	 * changes rather than reduce that set by intersecting it with
-	 * ExecGetAllUpdatedCols().  Desipte the name it provides the set of
-	 * targeted attributes in the SQL used for the UPDATE and any triggers,
-	 * but that doesn't include any attributes updated using
-	 * heap_modifiy_tuple(). There is one test in tsearch.sql that does just
-	 * that, modifies an indexed attribute that isn't specified in the SQL and
-	 * so isn't present in that bitmapset.
+	 * RelationGetIndexAttrBitmap returns a copy we are free to mutate;
+	 * ExecCompareSlotAttrs deletes the attributes that did not change and
+	 * returns the surviving "modified indexed attributes" set.
 	 */
 	attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_INDEXED);
-
-	/*
-	 * When there are indexed attributes mentioned in the UPDATE then we need
-	 * to find the subset that changed value.  That's the
-	 * "modified_idx_attrs".
-	 */
 	attrs = ExecCompareSlotAttrs(attrs, tupdesc, old_tts, new_tts);
 
 	return attrs;
@@ -2513,8 +2518,8 @@ ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 	bool		partition_constraint_failed;
 	TM_Result	result;
 
-	/* The set of modified indexed attributes that trigger new index entries */
-	Bitmapset  *modified_idx_attrs = NULL;
+	/* Reset any state left over from a previous call */
+	updateCxt->modified_attrs = NULL;
 
 	updateCxt->crossPartUpdate = false;
 
@@ -2638,7 +2643,8 @@ lreplace:
 	 * we will overlook attributes directly modified by heap_modify_tuple()
 	 * which are not known to ExecGetUpdatedCols().
 	 */
-	modified_idx_attrs = ExecUpdateModifiedIdxAttrs(resultRelInfo, oldSlot, slot);
+	updateCxt->modified_attrs =
+		ExecUpdateModifiedIdxAttrs(resultRelInfo, oldSlot, slot);
 
 	/*
 	 * Call into the table AM to update the heap tuple.
@@ -2649,6 +2655,8 @@ lreplace:
 	 * for referential integrity updates in transaction-snapshot mode
 	 * transactions.
 	 */
+	Assert(!bms_is_member(TableTupleUpdateAllIndexes,
+						  updateCxt->modified_attrs));
 	result = table_tuple_update(resultRelationDesc, tupleid, slot,
 								estate->es_output_cid,
 								0,
@@ -2656,8 +2664,7 @@ lreplace:
 								estate->es_crosscheck_snapshot,
 								true /* wait for commit */ ,
 								&context->tmfd, &updateCxt->lockmode,
-								modified_idx_attrs,
-								&updateCxt->updateIndexes);
+								&updateCxt->modified_attrs);
 
 	return result;
 }
@@ -2678,14 +2685,26 @@ ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt,
 	List	   *recheckIndexes = NIL;
 
 	/* insert index entries for tuple if necessary */
-	if (resultRelInfo->ri_NumIndices > 0 && (updateCxt->updateIndexes != TU_None))
+	if (resultRelInfo->ri_NumIndices > 0 &&
+		!bms_is_empty(updateCxt->modified_attrs))
 	{
-		uint32		flags = EIIT_IS_UPDATE;
+		bool		all_indexes =
+			bms_is_member(TableTupleUpdateAllIndexes,
+						  updateCxt->modified_attrs);
+
+		/*
+		 * Populate per-index ii_IndexUnchanged before inserting.  When the AM
+		 * stored an independent new version (whole-row attribute present)
+		 * every index needs a fresh entry; for a HOT update only those whose
+		 * attributes overlap the modified set do.
+		 */
+		ExecSetIndexUnchanged(resultRelInfo, updateCxt->modified_attrs);
 
-		if (updateCxt->updateIndexes == TU_Summarizing)
-			flags |= EIIT_ONLY_SUMMARIZING;
 		recheckIndexes = ExecInsertIndexTuples(resultRelInfo, context->estate,
-											   flags, slot, NIL,
+											   EIIT_IS_UPDATE |
+											   (all_indexes ?
+												0 : EIIT_IS_HOT_INDEXED),
+											   slot, NIL,
 											   NULL);
 	}
 
diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c
index 40b09958ac2..f050c088d28 100644
--- a/src/backend/nodes/makefuncs.c
+++ b/src/backend/nodes/makefuncs.c
@@ -845,8 +845,6 @@ makeIndexInfo(int numattrs, int numkeyattrs, Oid amoid, List *expressions,
 	n->ii_Unique = unique;
 	n->ii_NullsNotDistinct = nulls_not_distinct;
 	n->ii_ReadyForInserts = isready;
-	n->ii_CheckedUnchanged = false;
-	n->ii_IndexUnchanged = false;
 	n->ii_Concurrent = concurrent;
 	n->ii_Summarizing = summarizing;
 	n->ii_WithoutOverlaps = withoutoverlaps;
diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c
index 04f2eb21d0b..805e3489956 100644
--- a/src/backend/utils/activity/pgstat_relation.c
+++ b/src/backend/utils/activity/pgstat_relation.c
@@ -384,11 +384,17 @@ pgstat_count_heap_insert(Relation rel, PgStat_Counter n)
 
 /*
  * count a tuple update
+ *
+ * hot      -- the update was a heap-only tuple (classic HOT or HOT-indexed)
+ * hot_indexed -- the update was a HOT-indexed update, a subcase of
+ *                hot=true; hot_indexed implies hot
+ * newpage  -- the new tuple went to a different buffer than the old one
  */
 void
-pgstat_count_heap_update(Relation rel, bool hot, bool newpage)
+pgstat_count_heap_update(Relation rel, bool hot, bool hot_indexed, bool newpage)
 {
 	Assert(!(hot && newpage));
+	Assert(!(hot_indexed && !hot));
 
 	if (pgstat_should_count_relation(rel))
 	{
@@ -398,11 +404,17 @@ pgstat_count_heap_update(Relation rel, bool hot, bool newpage)
 		pgstat_info->trans->tuples_updated++;
 
 		/*
-		 * tuples_hot_updated and tuples_newpage_updated counters are
-		 * nontransactional, so just advance them
+		 * tuples_hot_updated, tuples_hot_indexed_updated, and
+		 * tuples_newpage_updated counters are nontransactional, so just
+		 * advance them.  tuples_siu is counted in *addition* to tuples_hot:
+		 * every hot-indexed update is also a HOT update.
 		 */
 		if (hot)
+		{
 			pgstat_info->counts.tuples_hot_updated++;
+			if (hot_indexed)
+				pgstat_info->counts.tuples_hot_indexed_updated++;
+		}
 		else if (newpage)
 			pgstat_info->counts.tuples_newpage_updated++;
 	}
@@ -854,7 +866,10 @@ pgstat_relation_flush_cb(PgStat_EntryRef *entry_ref, bool nowait)
 	tabentry->tuples_updated += lstats->counts.tuples_updated;
 	tabentry->tuples_deleted += lstats->counts.tuples_deleted;
 	tabentry->tuples_hot_updated += lstats->counts.tuples_hot_updated;
+	tabentry->tuples_hot_indexed_updated += lstats->counts.tuples_hot_indexed_updated;
 	tabentry->tuples_newpage_updated += lstats->counts.tuples_newpage_updated;
+	tabentry->tuples_hot_indexed_upd_skipped += lstats->counts.tuples_hot_indexed_upd_skipped;
+	tabentry->tuples_hot_indexed_upd_matched += lstats->counts.tuples_hot_indexed_upd_matched;
 
 	/*
 	 * If table was truncated/dropped, first reset the live/dead counters.
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 055f757107f..7d4db32fd38 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -1586,6 +1586,7 @@ RelationInitIndexAccessInfo(Relation relation)
 	 */
 	relation->rd_indexprs = NIL;
 	relation->rd_indpred = NIL;
+	relation->rd_indattr = NULL;
 	relation->rd_exclops = NULL;
 	relation->rd_exclprocs = NULL;
 	relation->rd_exclstrats = NULL;
@@ -2484,6 +2485,7 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc)
 	bms_free(relation->rd_idattr);
 	bms_free(relation->rd_indexedattr);
 	bms_free(relation->rd_summarizedattr);
+	bms_free(relation->rd_exprindexattr);
 	if (relation->rd_pubdesc)
 		pfree(relation->rd_pubdesc);
 	if (relation->rd_options)
@@ -5275,6 +5277,108 @@ RelationGetIndexPredicate(Relation relation)
 	return result;
 }
 
+/*
+ * RelationGetIndexedAttrs -- palloc'd Bitmapset of heap attrs this index
+ * references.
+ *
+ * Includes attributes used as simple key columns, INCLUDE columns, inside
+ * expression columns, and inside the partial-index predicate.  Attribute
+ * numbers use the FirstLowInvalidHeapAttributeNumber offset convention so
+ * that system attributes are representable alongside user attributes.
+ *
+ * The function builds up the bitmap from:
+ *   - rd_index->indkey           (keys + INCLUDE)
+ *   - RelationGetIndexExpressions (parsed expression trees, already cached)
+ *   - RelationGetIndexPredicate   (parsed predicate tree, already cached)
+ * and caches a copy in rd_indexedattr, which lives in rd_indexcxt.
+ *
+ * The returned Bitmapset is allocated in the caller's current memory
+ * context; the caller owns it and must bms_free when done.  We never hand
+ * out a borrowed pointer to the cached copy because relcache invalidation
+ * can rebuild rd_indexcxt in place even while a refcount is held.
+ *
+ * Caller must hold an open lock on the index relation.
+ */
+Bitmapset *
+RelationGetIndexedAttrs(Relation indexRel)
+{
+	Bitmapset  *attrs = NULL;
+	Form_pg_index indexStruct;
+	List	   *indexprs;
+	List	   *indpred;
+	MemoryContext oldcxt;
+
+	Assert(indexRel->rd_rel->relkind == RELKIND_INDEX ||
+		   indexRel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX);
+
+	/* Fast path: return a copy of the cached bitmap. */
+	if (indexRel->rd_indattr != NULL)
+		return bms_copy(indexRel->rd_indattr);
+
+	indexStruct = indexRel->rd_index;
+
+	/*
+	 * During very early bootstrap rd_indextuple may not be populated yet. In
+	 * that case we fall back to just the key columns without caching.
+	 */
+	if (indexRel->rd_indextuple == NULL)
+	{
+		for (int i = 0; i < indexStruct->indnatts; i++)
+		{
+			AttrNumber	attrnum = indexStruct->indkey.values[i];
+
+			if (attrnum != 0)
+				attrs = bms_add_member(attrs,
+									   attrnum - FirstLowInvalidHeapAttributeNumber);
+		}
+		return attrs;
+	}
+
+	/*
+	 * Key columns and INCLUDE (covering) columns.  INCLUDE columns must be
+	 * counted: their values are stored in the index leaf and served by
+	 * index-only scans, so an update that changes an INCLUDE column must
+	 * insert a fresh index entry (or be disqualified from staying
+	 * HOT-indexed) exactly as for a key column.  This matches the heap-level
+	 * RelationGetIndexAttrBitmap(..., INDEX_ATTR_BITMAP_INDEXED), which also
+	 * unions all indnatts.  Expression and partial-predicate columns are
+	 * added below.
+	 */
+	for (int i = 0; i < indexStruct->indnatts; i++)
+	{
+		AttrNumber	attrnum = indexStruct->indkey.values[i];
+
+		/* attnum 0 means "expression"; those attrs are picked up below. */
+		if (attrnum != 0)
+			attrs = bms_add_member(attrs,
+								   attrnum - FirstLowInvalidHeapAttributeNumber);
+	}
+
+	/* Expression columns (via already-parsed tree, reusing relcache). */
+	indexprs = RelationGetIndexExpressions(indexRel);
+	if (indexprs != NIL)
+		pull_varattnos((Node *) indexprs, 1, &attrs);
+
+	/* Partial-index predicate columns. */
+	indpred = RelationGetIndexPredicate(indexRel);
+	if (indpred != NIL)
+		pull_varattnos((Node *) indpred, 1, &attrs);
+
+	/*
+	 * Cache a copy inside rd_indexcxt so subsequent calls are cheap.  The
+	 * cached bitmap is freed along with rd_indexcxt on relcache rebuild, so
+	 * it's safe to stash here.
+	 */
+	if (indexRel->rd_indexcxt != NULL)
+	{
+		oldcxt = MemoryContextSwitchTo(indexRel->rd_indexcxt);
+		indexRel->rd_indattr = bms_copy(attrs);
+		MemoryContextSwitchTo(oldcxt);
+	}
+
+	return attrs;
+}
+
 /*
  * RelationGetIndexAttrBitmap -- get a bitmap of index attribute numbers
  *
@@ -5313,6 +5417,7 @@ Bitmapset *
 RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
 {
 	Bitmapset  *uindexattrs;	/* columns in unique indexes */
+	Bitmapset  *exprindexattrs; /* columns referenced by expression indexes */
 	Bitmapset  *pkindexattrs;	/* columns in the primary index */
 	Bitmapset  *idindexattrs;	/* columns in the replica identity */
 	Bitmapset  *indexedattrs;	/* columns referenced by indexes */
@@ -5339,6 +5444,8 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
 				return bms_copy(relation->rd_indexedattr);
 			case INDEX_ATTR_BITMAP_SUMMARIZED:
 				return bms_copy(relation->rd_summarizedattr);
+			case INDEX_ATTR_BITMAP_EXPRESSION:
+				return bms_copy(relation->rd_exprindexattr);
 			default:
 				elog(ERROR, "unknown attrKind %u", attrKind);
 		}
@@ -5383,6 +5490,7 @@ restart:
 	idindexattrs = NULL;
 	indexedattrs = NULL;
 	summarizedattrs = NULL;
+	exprindexattrs = NULL;
 	foreach(l, indexoidlist)
 	{
 		Oid			indexOid = lfirst_oid(l);
@@ -5487,6 +5595,28 @@ restart:
 		/* Collect all attributes in the index predicate, too */
 		pull_varattnos(indexPredicate, 1, attrs);
 
+		/*
+		 * If this index evaluates an expression, record every heap attribute
+		 * it references (key columns, expression vars, predicate vars) in
+		 * exprindexattrs.  HeapUpdateHotAllowable() disqualifies the
+		 * HOT-indexed path for an UPDATE that touches one of these, because
+		 * expression-aware selective index maintenance is not implemented
+		 * yet.
+		 */
+		if (indexExpressions != NULL)
+		{
+			for (i = 0; i < indexDesc->rd_index->indnatts; i++)
+			{
+				int			attrnum = indexDesc->rd_index->indkey.values[i];
+
+				if (attrnum != 0)
+					exprindexattrs = bms_add_member(exprindexattrs,
+													attrnum - FirstLowInvalidHeapAttributeNumber);
+			}
+			pull_varattnos(indexExpressions, 1, &exprindexattrs);
+			pull_varattnos(indexPredicate, 1, &exprindexattrs);
+		}
+
 		index_close(indexDesc, AccessShareLock);
 	}
 
@@ -5515,14 +5645,27 @@ restart:
 		bms_free(idindexattrs);
 		bms_free(indexedattrs);
 		bms_free(summarizedattrs);
+		bms_free(exprindexattrs);
 
 		goto restart;
 	}
 
 	/*
-	 * Record what attributes are only referenced by summarizing indexes. Then
-	 * add that into the other indexed attributes to track all referenced
-	 * attributes.
+	 * Record which attributes are referenced only by summarizing indexes, so
+	 * INDEX_ATTR_BITMAP_SUMMARIZED reports columns whose sole indexes are
+	 * summarizing ones, then fold those columns into indexedattrs as well.
+	 *
+	 * INDEX_ATTR_BITMAP_INDEXED must include summarizing-index columns for
+	 * the HOT-indexed write path: it compares the old and new tuples over
+	 * this bitmap to build the set of modified indexed attributes, and only
+	 * maintains indexes when that set is non-empty (or the update is
+	 * non-HOT).  A change to a column indexed only by a summarizing index
+	 * must therefore appear in the bitmap so the summarizing index gets its
+	 * block summary refreshed.  HeapUpdateHotAllowable's all_summarizing
+	 * check still keeps such an update on the classic-HOT path (it stays
+	 * classic HOT, since INDEX_ATTR_BITMAP_SUMMARIZED -- summarizing-only --
+	 * is a superset of the modified attributes), and the summarizing index
+	 * inserts unconditionally via its ii_Summarizing flag.
 	 */
 	summarizedattrs = bms_del_members(summarizedattrs, indexedattrs);
 	indexedattrs = bms_add_members(indexedattrs, summarizedattrs);
@@ -5539,6 +5682,8 @@ restart:
 	relation->rd_indexedattr = NULL;
 	bms_free(relation->rd_summarizedattr);
 	relation->rd_summarizedattr = NULL;
+	bms_free(relation->rd_exprindexattr);
+	relation->rd_exprindexattr = NULL;
 
 	/*
 	 * Now save copies of the bitmaps in the relcache entry.  We intentionally
@@ -5553,6 +5698,7 @@ restart:
 	relation->rd_idattr = bms_copy(idindexattrs);
 	relation->rd_indexedattr = bms_copy(indexedattrs);
 	relation->rd_summarizedattr = bms_copy(summarizedattrs);
+	relation->rd_exprindexattr = bms_copy(exprindexattrs);
 	relation->rd_attrsvalid = true;
 	MemoryContextSwitchTo(oldcxt);
 
@@ -5569,6 +5715,72 @@ restart:
 			return indexedattrs;
 		case INDEX_ATTR_BITMAP_SUMMARIZED:
 			return summarizedattrs;
+		case INDEX_ATTR_BITMAP_EXPRESSION:
+			return exprindexattrs;
+		default:
+			elog(ERROR, "unknown attrKind %u", attrKind);
+			return NULL;
+	}
+}
+
+/*
+ * RelationGetIndexAttrBitmapNoCopy -- borrowing variant of
+ *		RelationGetIndexAttrBitmap
+ *
+ * Returns a pointer to the relcache-owned bitmap for the given attrKind
+ * without making a defensive copy.  This is a hot-path optimization for
+ * read-only callers that perform set operations like bms_overlap,
+ * bms_is_subset, bms_equal, or bms_num_members and never mutate the
+ * returned bitmap.  The result is conceptually `const Bitmapset *`; callers
+ * must not pass it to anything that could free or modify the underlying
+ * memory (e.g., bms_add_member, bms_int_members, bms_free).
+ *
+ * Lifetime: the pointer is valid only until the next event that could
+ * trigger a relcache invalidation on `relation`.  Callers must not invoke
+ * any code that opens a relation, runs catalog lookups, or otherwise
+ * accepts invalidation messages between the fetch and the last use.
+ *
+ * For the common case the relcache entry's attribute bitmaps are already
+ * computed (rd_attrsvalid is true).  When they aren't, we go through
+ * RelationGetIndexAttrBitmap to populate the cache (which costs one
+ * throwaway bms_copy on first use) and then return the cached pointer on
+ * the second pass.  The first-use path is rare and never on the bench hot
+ * path, so the simplicity is preferred over open-coding the populate-only
+ * variant.
+ */
+const Bitmapset *
+RelationGetIndexAttrBitmapNoCopy(Relation relation, IndexAttrBitmapKind attrKind)
+{
+	if (!relation->rd_attrsvalid)
+	{
+		Bitmapset  *populated;
+
+		/* Populate rd_*attr fields; discard the returned copy. */
+		populated = RelationGetIndexAttrBitmap(relation, attrKind);
+		bms_free(populated);
+
+		/*
+		 * If the relation has no indexes, RelationGetIndexAttrBitmap returns
+		 * NULL without setting rd_attrsvalid.  Mirror that here.
+		 */
+		if (!relation->rd_attrsvalid)
+			return NULL;
+	}
+
+	switch (attrKind)
+	{
+		case INDEX_ATTR_BITMAP_KEY:
+			return relation->rd_keyattr;
+		case INDEX_ATTR_BITMAP_PRIMARY_KEY:
+			return relation->rd_pkattr;
+		case INDEX_ATTR_BITMAP_IDENTITY_KEY:
+			return relation->rd_idattr;
+		case INDEX_ATTR_BITMAP_INDEXED:
+			return relation->rd_indexedattr;
+		case INDEX_ATTR_BITMAP_SUMMARIZED:
+			return relation->rd_summarizedattr;
+		case INDEX_ATTR_BITMAP_EXPRESSION:
+			return relation->rd_exprindexattr;
 		default:
 			elog(ERROR, "unknown attrKind %u", attrKind);
 			return NULL;
@@ -6508,6 +6720,7 @@ load_relcache_init_file(bool shared)
 		rel->rd_partcheckcxt = NULL;
 		rel->rd_indexprs = NIL;
 		rel->rd_indpred = NIL;
+		rel->rd_indattr = NULL;
 		rel->rd_exclops = NULL;
 		rel->rd_exclprocs = NULL;
 		rel->rd_exclstrats = NULL;
diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h
index 79240333530..7aec083b8ee 100644
--- a/src/include/access/amapi.h
+++ b/src/include/access/amapi.h
@@ -14,6 +14,7 @@
 
 #include "access/cmptype.h"
 #include "access/genam.h"
+#include "access/itup.h"
 #include "access/stratnum.h"
 #include "nodes/nodes.h"
 #include "nodes/pg_list.h"
@@ -29,7 +30,6 @@ typedef struct IndexPath IndexPath;
 /* Likewise, this file shouldn't depend on execnodes.h. */
 typedef struct IndexInfo IndexInfo;
 
-
 /*
  * Properties for amproperty API.  This list covers properties known to the
  * core code, but an index AM can define its own properties, by matching the
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 2dbfad92113..1cdf891055c 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -384,11 +384,45 @@ extern TM_Result heap_delete(Relation relation, const ItemPointerData *tid,
 							 bool wait, TM_FailureData *tmfd);
 extern void heap_finish_speculative(Relation relation, const ItemPointerData *tid);
 extern void heap_abort_speculative(Relation relation, const ItemPointerData *tid);
+
+/*
+ * HeapUpdateIndexMode --
+ *	Three-valued classification returned by HeapUpdateHotAllowable() that
+ *	tells heap_update() whether a HOT update is permitted for this tuple and,
+ *	if so, whether the indexes may be maintained selectively.
+ *
+ *	HEAP_UPDATE_ALL_INDEXES
+ *		HOT is not allowed; the new tuple must go on its own TID and every
+ *		index receives a fresh entry.  This is the classic pre-HOT-indexed
+ *		behavior for updates that modify a non-summarizing indexed attribute.
+ *
+ *	HEAP_HEAP_ONLY_UPDATE
+ *		Classic HOT update: no non-summarizing indexed attribute changed (only
+ *		summarizing ones, if any), so no index needs a new entry.
+ *
+ *	HEAP_SELECTIVE_INDEX_UPDATE
+ *		HOT with selective index update: at least one non-summarizing index's
+ *		attribute changed, but the new tuple can still join the HOT chain on
+ *		the same page; only the indexes whose attributes changed receive a new
+ *		entry.  As for classic HOT, heap_update() still falls back to a
+ *		non-HOT update if the new tuple does not fit on the page.
+ *
+ *	Callers should spell the exact mode they care about; the numeric values
+ *	are not meaningful as an ordering.
+ */
+typedef enum HeapUpdateIndexMode
+{
+	HEAP_UPDATE_ALL_INDEXES = 0,
+	HEAP_HEAP_ONLY_UPDATE = 1,
+	HEAP_SELECTIVE_INDEX_UPDATE = 2,
+} HeapUpdateIndexMode;
+
 extern TM_Result heap_update(Relation relation, const ItemPointerData *otid,
 							 HeapTuple newtup, CommandId cid, uint32 options,
 							 Snapshot crosscheck, bool wait,
 							 TM_FailureData *tmfd, const LockTupleMode lockmode,
-							 const Bitmapset *modified_idx_attrs, const bool hot_allowed);
+							 const Bitmapset *modified_idx_attrs,
+							 HeapUpdateIndexMode hot_mode);
 extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
 								 CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
 								 bool follow_updates,
@@ -423,7 +457,7 @@ extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple);
 extern void simple_heap_insert(Relation relation, HeapTuple tup);
 extern void simple_heap_delete(Relation relation, const ItemPointerData *tid);
 extern void simple_heap_update(Relation relation, const ItemPointerData *otid,
-							   HeapTuple tup, TU_UpdateIndexes *update_indexes);
+							   HeapTuple tup, bool *update_all_indexes);
 
 extern TransactionId heap_index_delete_tuples(Relation rel,
 											  TM_IndexDeleteOp *delstate);
@@ -434,7 +468,10 @@ extern void heapam_index_fetch_reset(IndexFetchTableData *scan);
 extern void heapam_index_fetch_end(IndexFetchTableData *scan);
 extern bool heap_hot_search_buffer(ItemPointer tid, Relation relation,
 								   Buffer buffer, Snapshot snapshot, HeapTuple heapTuple,
-								   bool *all_dead, bool first_call);
+								   bool *all_dead, bool first_call,
+								   bool *hot_indexed_recheck,
+								   uint8 *crossed_bitmap,
+								   bool *prefix_all_dead);
 extern bool heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
 									 ItemPointer tid, Snapshot snapshot,
 									 TupleTableSlot *slot, bool *heap_continue,
@@ -464,8 +501,9 @@ extern void log_heap_prune_and_freeze(Relation relation, Buffer buffer,
 									  OffsetNumber *unused, int nunused);
 
 /* in heap/heapam.c */
-extern bool HeapUpdateHotAllowable(Relation relation, const Bitmapset *modified_idx_attrs,
-								   bool *summarized_only);
+
+extern HeapUpdateIndexMode HeapUpdateHotAllowable(Relation relation,
+												const Bitmapset *modified_idx_attrs);
 extern LockTupleMode HeapUpdateDetermineLockmode(Relation relation,
 												 const Bitmapset *modified_idx_attrs);
 
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index 2ea06a67a63..fe4469178aa 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -134,6 +134,45 @@ typedef struct IndexFetchTableData
 	 * permitted.
 	 */
 	uint32		flags;
+
+	/*
+	 * Side channel for table AMs whose update chains can reach a different
+	 * set of index-key values than the arriving index entry recorded (heap's
+	 * HOT-selectively-updated chains).  Set true by the table AM when the
+	 * walk to the live tuple crossed a HOT/SIU hop after the entry's own
+	 * tuple, meaning the arriving entry's stored key may no longer match the
+	 * live tuple and the index-access layer must recheck it.  Left false when
+	 * no such hop was crossed (the entry is definitely current), and always
+	 * false for AMs without such chains.
+	 */
+	bool		xs_hot_indexed_recheck;
+
+	/*
+	 * Companion to xs_hot_indexed_recheck.  xs_hot_indexed_crossed is the
+	 * union of the per-hop modified-attrs bitmaps the walk crossed after the
+	 * entry's own tuple, over heap attribute numbers (bit attnum-1 for a
+	 * 1-based attnum).  The index-access layer tests it against the arriving
+	 * index's key columns to judge staleness without a key comparison: any
+	 * overlap means a crossed hop changed one of the index's inputs, so the
+	 * entry is stale.  The union is complete (every crossed live hop and
+	 * collapse-survivor stub contributes its bitmap, and collapse only
+	 * reclaims members subsumed by surviving hops), so disjointness reliably
+	 * means fresh.  It is NULL for AMs without such chains and is sized by
+	 * the table AM for the heap relation's column count.
+	 */
+	uint8	   *xs_hot_indexed_crossed;
+
+	/*
+	 * Set by the table AM when it returns a tuple: true iff every chain
+	 * member the walk skipped before reaching the returned (visible) tuple is
+	 * dead to all transactions (below the global xmin horizon).  Combined
+	 * with a stale verdict (the crossed-attribute bitmap overlapped the
+	 * index's key columns), this lets the index-access layer
+	 * kill the arriving leaf: no snapshot can reach a matching version
+	 * through it, so it is redundant.  AMs without such chains leave it
+	 * false.
+	 */
+	bool		xs_prefix_all_dead;
 } IndexFetchTableData;
 
 struct IndexScanInstrumentation;
@@ -154,6 +193,13 @@ typedef struct IndexScanDescData
 	struct ScanKeyData *keyData;	/* array of index qualifier descriptors */
 	struct ScanKeyData *orderByData;	/* array of ordering op descriptors */
 	bool		xs_want_itup;	/* caller requests index tuples */
+	bool		xs_index_only;	/* caller is an index-only scan that may
+								 * return tuples without fetching the heap;
+								 * AMs must retain leaf-page pins for such
+								 * scans (VM all-visible / TID-recycle race),
+								 * whereas a plain scan that sets xs_want_itup
+								 * only to inspect the index tuple still
+								 * fetches the heap and may drop pins */
 	bool		xs_temp_snap;	/* unregister snapshot at scan end? */
 
 	/* signaling to index AM about killing index tuples */
@@ -189,6 +235,20 @@ typedef struct IndexScanDescData
 
 	bool		xs_recheck;		/* T means scan keys must be rechecked */
 
+	/*
+	 * T means the index entry that reached xs_heaptid is stale: the HOT chain
+	 * walked to reach the tuple crossed a HOT-selectively-updated (HOT/SIU)
+	 * hop that changed an attribute this index covers, so the arriving
+	 * entry's stored key no longer matches the live tuple.  The executor
+	 * drops such a tuple; the row is re-supplied by the fresh entry inserted
+	 * for the new value.  Unlike xs_recheck (set by lossy AMs such as GiST
+	 * and GIN), this is computed by the index-access layer by testing the
+	 * heap AM's crossed-attribute bitmap (xs_hot_indexed_crossed) against
+	 * this index's key columns: any overlap means a crossed hop changed one
+	 * of the index's inputs, so the entry is stale.
+	 */
+	bool		xs_hot_indexed_stale;
+
 	/*
 	 * When fetching with an ordering operator, the values of the ORDER BY
 	 * expressions of the last returned tuple, according to the index.  If
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index a9778b3528d..e7bd9f3e6fc 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -19,6 +19,7 @@
 
 #include "access/relscan.h"
 #include "access/sdir.h"
+#include "access/sysattr.h"
 #include "access/xact.h"
 #include "executor/tuptable.h"
 #include "storage/read_stream.h"
@@ -28,6 +29,18 @@
 
 #define DEFAULT_TABLE_ACCESS_METHOD	"heap"
 
+/*
+ * Whole-row sentinel for the in/out modified-attributes set of
+ * table_tuple_update().  On input the caller supplies the indexed attributes
+ * whose values changed.  A table AM that stored the new tuple as an
+ * independent version not reachable through the existing index entries (for
+ * heap, a non-HOT update) adds this whole-row attribute (attribute number 0,
+ * FirstLowInvalidHeapAttributeNumber convention) on output, signalling that
+ * every index needs a new entry.  Diffing real columns never yields attribute
+ * 0, so it is unambiguous as this sentinel.
+ */
+#define TableTupleUpdateAllIndexes	(0 - FirstLowInvalidHeapAttributeNumber)
+
 /* GUCs */
 extern PGDLLIMPORT char *default_table_access_method;
 extern PGDLLIMPORT bool synchronize_seqscans;
@@ -125,22 +138,6 @@ typedef enum TM_Result
 	TM_WouldBlock,
 } TM_Result;
 
-/*
- * Result codes for table_update(..., update_indexes*..).
- * Used to determine which indexes to update.
- */
-typedef enum TU_UpdateIndexes
-{
-	/* No indexed columns were updated (incl. TID addressing of tuple) */
-	TU_None,
-
-	/* A non-summarizing indexed column was updated, or the TID has changed */
-	TU_All,
-
-	/* Only summarized columns were updated, TID is unchanged */
-	TU_Summarizing,
-} TU_UpdateIndexes;
-
 /*
  * When table_tuple_update, table_tuple_delete, or table_tuple_lock fail
  * because the target tuple is already outdated, they fill in this struct to
@@ -488,6 +485,13 @@ typedef struct TableAmRoutine
 	 * index_fetch_tuple iff it is guaranteed that no backend needs to see
 	 * that tuple. Index AMs can use that to avoid returning that tid in
 	 * future searches.
+	 *
+	 * If a tuple is returned and the table AM reached it by walking a HOT
+	 * chain that crossed a HOT-selectively-updated (HOT/SIU) hop after the
+	 * arriving entry's own tuple, it sets scan->xs_hot_indexed_recheck (see
+	 * struct IndexFetchTableData) to tell the index-access layer to recheck
+	 * the arriving leaf key against the live tuple.  AMs without such update
+	 * chains leave it false.
 	 */
 	bool		(*index_fetch_tuple) (struct IndexFetchTableData *scan,
 									  ItemPointer tid,
@@ -586,8 +590,7 @@ typedef struct TableAmRoutine
 								 bool wait,
 								 TM_FailureData *tmfd,
 								 LockTupleMode *lockmode,
-								 const Bitmapset *modified_idx_attrs,
-								 TU_UpdateIndexes *update_indexes);
+								 Bitmapset **modified_attrs);
 
 	/* see table_tuple_lock() for reference about parameters */
 	TM_Result	(*tuple_lock) (Relation rel,
@@ -1319,11 +1322,20 @@ table_index_fetch_tuple(struct IndexFetchTableData *scan,
  * returns whether there are table tuple items corresponding to an index
  * entry.  This likely is only useful to verify if there's a conflict in a
  * unique index.
+ *
+ * If keep_slot is non-NULL, on a positive result the function stores the
+ * fetched tuple into *keep_slot (which must be a valid slot of the
+ * relation's type) and returns with the slot populated; the caller is
+ * responsible for clearing the slot.  When keep_slot is NULL a temporary
+ * slot is created internally and dropped before return, matching the
+ * pre-existing behaviour.
  */
 extern bool table_index_fetch_tuple_check(Relation rel,
 										  ItemPointer tid,
 										  Snapshot snapshot,
-										  bool *all_dead);
+										  bool *all_dead,
+										  bool *hot_indexed_recheck_out,
+										  TupleTableSlot *keep_slot);
 
 
 /* ------------------------------------------------------------------------
@@ -1574,12 +1586,20 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
  *		TABLE_UPDATE_NO_LOGICAL -- force-disables the emitting of logical
  *		decoding information for the tuple.
  *
+ * In parameters:
+ *	modified_attrs - in/out; on input, the set of indexed attributes whose
+ *		values changed (FirstLowInvalidHeapAttributeNumber convention).  A
+ *		table AM may use this to choose between HOT and non-HOT storage of the
+ *		new tuple.  On output the AM adds the whole-row attribute
+ *		(TableTupleUpdateAllIndexes) iff it stored the new tuple as an
+ *		independent version requiring a fresh entry in every index; otherwise
+ *		the caller consults each index's own attributes against this set to
+ *		decide per index (the standard HOT / selective-index-update cases).
+ *
  * Output parameters:
  *	slot - newly constructed tuple data to store
  *	tmfd - filled in failure cases (see below)
  *	lockmode - filled with lock mode acquired on tuple
- *	update_indexes - in success cases this is set if new index entries
- *		are required for this tuple; see TU_UpdateIndexes
  *
  * Normal, successful return value is TM_Ok, which means we did actually
  * update it.  Failure return codes are TM_SelfModified, TM_Updated, and
@@ -1600,12 +1620,14 @@ table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
 				   CommandId cid, uint32 options,
 				   Snapshot snapshot, Snapshot crosscheck,
 				   bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode,
-				   const Bitmapset *modified_idx_attrs, TU_UpdateIndexes *update_indexes)
+				   Bitmapset **modified_attrs)
 {
+	Assert(modified_attrs == NULL ||
+		   !bms_is_member(TableTupleUpdateAllIndexes, *modified_attrs));
 	return rel->rd_tableam->tuple_update(rel, otid, slot,
 										 cid, options, snapshot, crosscheck,
 										 wait, tmfd, lockmode,
-										 modified_idx_attrs, update_indexes);
+										 modified_attrs);
 }
 
 /*
@@ -2090,8 +2112,7 @@ extern void simple_table_tuple_delete(Relation rel, ItemPointer tid,
 									  Snapshot snapshot);
 extern void simple_table_tuple_update(Relation rel, ItemPointer otid,
 									  TupleTableSlot *slot, Snapshot snapshot,
-									  const Bitmapset *modified_idx_attrs,
-									  TU_UpdateIndexes *update_indexes);
+									  Bitmapset **modified_attrs);
 
 
 /* ----------------------------------------------------------------------------
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 16661bc66d9..e7fccdae32f 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -15,6 +15,7 @@
 #define EXECUTOR_H
 
 #include "access/xlogdefs.h"
+#include "access/itup.h"
 #include "datatype/timestamp.h"
 #include "executor/execdesc.h"
 #include "fmgr.h"
@@ -755,11 +756,13 @@ extern Bitmapset *ExecGetAllUpdatedCols(ResultRelInfo *relinfo, EState *estate);
  */
 extern void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative);
 extern void ExecCloseIndices(ResultRelInfo *resultRelInfo);
+extern void ExecSetIndexUnchanged(ResultRelInfo *resultRelInfo,
+								  const Bitmapset *modified_idx_attrs);
 
 /* flags for ExecInsertIndexTuples */
 #define		EIIT_IS_UPDATE			(1<<0)
 #define		EIIT_NO_DUPE_ERROR		(1<<1)
-#define		EIIT_ONLY_SUMMARIZING	(1<<2)
+#define		EIIT_IS_HOT_INDEXED		(1<<2)
 extern List *ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, EState *estate,
 								   uint32 flags, TupleTableSlot *slot,
 								   List *arbiterIndexes,
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index e64fd8c7ea3..0bbe71a2a89 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -216,10 +216,14 @@ typedef struct IndexInfo
 	bool		ii_NullsNotDistinct;
 	/* is it valid for inserts? */
 	bool		ii_ReadyForInserts;
-	/* IndexUnchanged status determined yet? */
-	bool		ii_CheckedUnchanged;
-	/* aminsert hint, cached for retail inserts */
+	/* aminsert hint: index logically unchanged by UPDATE?  Narrow rule: key
+	 * columns only; INCLUDE columns and the partial-index predicate are not
+	 * considered (expression indexes are treated conservatively). */
 	bool		ii_IndexUnchanged;
+	/* selective UPDATE: does this index need a new entry?  Wide rule: true if
+	 * any key, INCLUDE, expression, or predicate column it references changed
+	 * (or the AM stored an independent new version). */
+	bool		ii_IndexNeedsUpdate;
 	/* are we doing a concurrent index build? */
 	bool		ii_Concurrent;
 	/* did we detect any broken HOT chains? */
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 58a44857f13..28b79370f0d 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -151,7 +151,19 @@ typedef struct PgStat_TableCounts
 	PgStat_Counter tuples_updated;
 	PgStat_Counter tuples_deleted;
 	PgStat_Counter tuples_hot_updated;
+	PgStat_Counter tuples_hot_indexed_updated;
 	PgStat_Counter tuples_newpage_updated;
+
+	/*
+	 * Per-index HOT-indexed update counters.  Maintained on pgstat entries
+	 * keyed on an index oid, not on the owning table's entry.  They count how
+	 * many HOT-indexed updates skipped this index (key unchanged) vs.
+	 * inserted a fresh entry (key changed).  Summarizing indexes do not
+	 * contribute to either counter.
+	 */
+	PgStat_Counter tuples_hot_indexed_upd_skipped;
+	PgStat_Counter tuples_hot_indexed_upd_matched;
+
 	bool		truncdropped;
 
 	PgStat_Counter delta_live_tuples;
@@ -218,7 +230,7 @@ typedef struct PgStat_TableXactStatus
  * ------------------------------------------------------------
  */
 
-#define PGSTAT_FILE_FORMAT_ID	0x01A5BCBC
+#define PGSTAT_FILE_FORMAT_ID	0x01A5BCBD
 
 typedef struct PgStat_ArchiverStats
 {
@@ -460,8 +472,13 @@ typedef struct PgStat_StatTabEntry
 	PgStat_Counter tuples_updated;
 	PgStat_Counter tuples_deleted;
 	PgStat_Counter tuples_hot_updated;
+	PgStat_Counter tuples_hot_indexed_updated;
 	PgStat_Counter tuples_newpage_updated;
 
+	/* Per-index HOT-indexed update counters (see PgStat_TableCounts). */
+	PgStat_Counter tuples_hot_indexed_upd_skipped;
+	PgStat_Counter tuples_hot_indexed_upd_matched;
+
 	PgStat_Counter live_tuples;
 	PgStat_Counter dead_tuples;
 	PgStat_Counter mod_since_analyze;
@@ -752,6 +769,16 @@ extern void pgstat_report_analyze(Relation rel,
 		if (pgstat_should_count_relation(rel))						\
 			(rel)->pgstat_info->counts.tuples_returned += (n);		\
 	} while (0)
+#define pgstat_count_hot_indexed_upd_skipped(rel)						\
+	do {															\
+		if (pgstat_should_count_relation(rel))						\
+			(rel)->pgstat_info->counts.tuples_hot_indexed_upd_skipped++;\
+	} while (0)
+#define pgstat_count_hot_indexed_upd_matched(rel)						\
+	do {															\
+		if (pgstat_should_count_relation(rel))						\
+			(rel)->pgstat_info->counts.tuples_hot_indexed_upd_matched++;\
+	} while (0)
 #define pgstat_count_buffer_read(rel)								\
 	do {															\
 		if (pgstat_should_count_relation(rel))						\
@@ -764,7 +791,7 @@ extern void pgstat_report_analyze(Relation rel,
 	} while (0)
 
 extern void pgstat_count_heap_insert(Relation rel, PgStat_Counter n);
-extern void pgstat_count_heap_update(Relation rel, bool hot, bool newpage);
+extern void pgstat_count_heap_update(Relation rel, bool hot, bool hot_indexed, bool newpage);
 extern void pgstat_count_heap_delete(Relation rel);
 extern void pgstat_count_truncate(Relation rel);
 extern void pgstat_update_heap_dead_tuples(Relation rel, int delta);
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 1ef5c1465c6..b60a51258c4 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -164,6 +164,7 @@ typedef struct RelationData
 	Bitmapset  *rd_idattr;		/* included in replica identity index */
 	Bitmapset  *rd_indexedattr; /* all cols referenced by indexes */
 	Bitmapset  *rd_summarizedattr;	/* cols indexed by summarizing indexes */
+	Bitmapset  *rd_exprindexattr;	/* cols referenced by expression indexes */
 
 	PublicationDesc *rd_pubdesc;	/* publication descriptor, or NULL */
 
@@ -217,6 +218,16 @@ typedef struct RelationData
 	Oid		   *rd_indcollation;	/* OIDs of index collations */
 	bytea	  **rd_opcoptions;	/* parsed opclass-specific options */
 
+	/*
+	 * Bitmap of heap attribute numbers referenced by this index (simple keys,
+	 * INCLUDE columns, expression columns, and partial-index predicate
+	 * columns), offset by FirstLowInvalidHeapAttributeNumber. Lazily built by
+	 * RelationGetIndexedAttrs() and cached in rd_indexcxt. Consumers must
+	 * bms_copy before relying on the pointer beyond any potential
+	 * AcceptInvalidationMessages() call.
+	 */
+	Bitmapset  *rd_indattr;
+
 	/*
 	 * rd_amcache is available for index and table AMs to cache private data
 	 * about the relation.  This must be just a cache since it may get reset
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h
index 89788091576..a381aa3e095 100644
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@@ -62,6 +62,19 @@ extern List *RelationGetDummyIndexExpressions(Relation relation);
 extern List *RelationGetIndexPredicate(Relation relation);
 extern bytea **RelationGetIndexAttOptions(Relation relation, bool copy);
 
+/*
+ * RelationGetIndexedAttrs -- return a freshly-palloc'd Bitmapset of every
+ * heap attribute this index references, via keys, INCLUDE columns,
+ * expressions, or partial-index predicates.
+ *
+ * The argument must be an index Relation (not its owning heap).  Attribute
+ * numbers are offset by FirstLowInvalidHeapAttributeNumber.  The result is
+ * palloc'd in the caller's context; bms_free when done.  The relcache
+ * caches its own copy in rd_indexcxt so subsequent calls only pay for the
+ * final bms_copy.
+ */
+extern Bitmapset *RelationGetIndexedAttrs(Relation indexRel);
+
 /*
  * Which set of columns to return by RelationGetIndexAttrBitmap.
  */
@@ -72,11 +85,15 @@ typedef enum IndexAttrBitmapKind
 	INDEX_ATTR_BITMAP_IDENTITY_KEY,
 	INDEX_ATTR_BITMAP_INDEXED,
 	INDEX_ATTR_BITMAP_SUMMARIZED,
+	INDEX_ATTR_BITMAP_EXPRESSION,
 } IndexAttrBitmapKind;
 
 extern Bitmapset *RelationGetIndexAttrBitmap(Relation relation,
 											 IndexAttrBitmapKind attrKind);
 
+extern const Bitmapset *RelationGetIndexAttrBitmapNoCopy(Relation relation,
+														 IndexAttrBitmapKind attrKind);
+
 extern Bitmapset *RelationGetIdentityKeyBitmap(Relation relation);
 
 extern void RelationGetExclusionInfo(Relation indexRelation,
diff --git a/src/test/regress/expected/hot_updates.out b/src/test/regress/expected/hot_updates.out
index 273fe3310da..06979ed31d0 100644
--- a/src/test/regress/expected/hot_updates.out
+++ b/src/test/regress/expected/hot_updates.out
@@ -1,147 +1,132 @@
 --
 -- HOT_UPDATES
--- Test Heap-Only Tuple (HOT) update decisions
+-- Test classic Heap-Only Tuple (HOT) update decisions
 --
--- This test systematically verifies that HOT updates are used when appropriate
--- and avoided when necessary (e.g., when indexed columns are modified).
+-- This file covers HOT decisions that apply identically on a pre-hot-indexed
+-- server: every UPDATE here either leaves all indexed attributes
+-- unchanged or touches only summarizing-index (BRIN) attributes, so the
+-- HOT vs non-HOT choice does not depend on whether Selective Index
+-- Update (hot-indexed) is enabled.  hot-indexed-specific behaviour (UPDATEs that modify
+-- a non-summarizing indexed attribute) is covered in
+-- hot_indexed_updates.sql.
 --
--- We use multiple validation methods:
--- 1. Statistics functions (pg_stat_get_tuples_hot_updated)
--- 2. pageinspect extension for HOT chain examination
--- 3. EXPLAIN to verify index usage after updates
+-- Validation methods:
+--   1. Statistics (pg_stat_get_tuples_hot_updated)
+--   2. pageinspect for HOT chain structure
+--   3. EXPLAIN to confirm the planner still picks the index
 --
 -- Load required extensions
 CREATE EXTENSION IF NOT EXISTS pageinspect;
--- Function to get HOT update count
+-- Sum of committed and in-progress (non-HOT, HOT) update counters.
 CREATE OR REPLACE FUNCTION get_hot_count(rel_name text)
 RETURNS TABLE (
     updates BIGINT,
     hot BIGINT
 ) AS $$
 DECLARE
-  rel_oid oid;
+    rel_oid oid;
 BEGIN
-  rel_oid := rel_name::regclass::oid;
-
-  -- Read both committed and transaction-local stats
-  -- In autocommit mode (default for regression tests), this works correctly
-  -- Note: In explicit transactions (BEGIN/COMMIT), committed stats already
-  -- include flushed updates, so this would double-count. For explicit
-  -- transaction testing, call pg_stat_force_next_flush() before this function.
-  updates := COALESCE(pg_stat_get_tuples_updated(rel_oid), 0) +
-             COALESCE(pg_stat_get_xact_tuples_updated(rel_oid), 0);
-  hot := COALESCE(pg_stat_get_tuples_hot_updated(rel_oid), 0) +
-         COALESCE(pg_stat_get_xact_tuples_hot_updated(rel_oid), 0);
-
-  RETURN NEXT;
+    rel_oid := rel_name::regclass::oid;
+    updates := COALESCE(pg_stat_get_tuples_updated(rel_oid), 0) +
+               COALESCE(pg_stat_get_xact_tuples_updated(rel_oid), 0);
+    hot := COALESCE(pg_stat_get_tuples_hot_updated(rel_oid), 0) +
+           COALESCE(pg_stat_get_xact_tuples_hot_updated(rel_oid), 0);
+    RETURN NEXT;
 END;
 $$ LANGUAGE plpgsql;
--- Check if a tuple is part of a HOT chain (has a predecessor on same page)
+-- True iff target_ctid is the TAIL of a HOT chain on the same page.
 CREATE OR REPLACE FUNCTION has_hot_chain(rel_name text, target_ctid tid)
 RETURNS boolean AS $$
 DECLARE
-  block_num int;
-  page_item record;
+    block_num int;
+    page_item record;
 BEGIN
-  block_num := (target_ctid::text::point)[0]::int;
-
-  -- Look for a different tuple on the same page that points to our target tuple
-  FOR page_item IN
-    SELECT lp, lp_flags, t_ctid
-    FROM heap_page_items(get_raw_page(rel_name, block_num))
-    WHERE lp_flags = 1
-      AND t_ctid IS NOT NULL
-      AND t_ctid = target_ctid
-      AND ('(' || block_num::text || ',' || lp::text || ')')::tid != target_ctid
-  LOOP
-    RETURN true;
-  END LOOP;
-
-  RETURN false;
+    block_num := (target_ctid::text::point)[0]::int;
+    FOR page_item IN
+        SELECT lp, lp_flags, t_ctid
+        FROM heap_page_items(get_raw_page(rel_name, block_num))
+        WHERE lp_flags = 1
+          AND t_ctid IS NOT NULL
+          AND t_ctid = target_ctid
+          AND ('(' || block_num::text || ',' || lp::text || ')')::tid != target_ctid
+    LOOP
+        RETURN true;
+    END LOOP;
+    RETURN false;
 END;
 $$ LANGUAGE plpgsql;
--- Print the HOT chain starting from a given tuple
+-- Emit the HOT chain rooted at start_ctid.
 CREATE OR REPLACE FUNCTION print_hot_chain(rel_name text, start_ctid tid)
 RETURNS TABLE(chain_position int, ctid tid, lp_flags text, t_ctid tid, chain_end boolean) AS
 $$
 #variable_conflict use_column
 DECLARE
-  block_num int;
-  line_ptr int;
-  current_ctid tid := start_ctid;
-  next_ctid tid;
-  position int := 0;
-  max_iterations int := 100;
-  page_item record;
-  found_predecessor boolean := false;
-  flags_name text;
+    block_num int;
+    line_ptr int;
+    current_ctid tid := start_ctid;
+    next_ctid tid;
+    position int := 0;
+    max_iterations int := 100;
+    page_item record;
+    found_predecessor boolean := false;
+    flags_name text;
 BEGIN
-  block_num := (start_ctid::text::point)[0]::int;
-
-  -- Find the predecessor (old tuple pointing to our start_ctid)
-  FOR page_item IN
-    SELECT lp, lp_flags, t_ctid
-    FROM heap_page_items(get_raw_page(rel_name, block_num))
-    WHERE lp_flags = 1
-      AND t_ctid = start_ctid
-  LOOP
-    current_ctid := ('(' || block_num::text || ',' || page_item.lp::text || ')')::tid;
-    found_predecessor := true;
-    EXIT;
-  END LOOP;
-
-  -- If no predecessor found, start with the given ctid
-  IF NOT found_predecessor THEN
-    current_ctid := start_ctid;
-  END IF;
-
-  -- Follow the chain forward
-  WHILE position < max_iterations LOOP
-    line_ptr := (current_ctid::text::point)[1]::int;
+    block_num := (start_ctid::text::point)[0]::int;
 
     FOR page_item IN
-      SELECT lp, lp_flags, t_ctid
-      FROM heap_page_items(get_raw_page(rel_name, block_num))
-      WHERE lp = line_ptr
+        SELECT lp, lp_flags, t_ctid
+        FROM heap_page_items(get_raw_page(rel_name, block_num))
+        WHERE lp_flags = 1
+          AND t_ctid = start_ctid
     LOOP
-      -- Map lp_flags to names
-      flags_name := CASE page_item.lp_flags
-        WHEN 0 THEN 'unused (0)'
-        WHEN 1 THEN 'normal (1)'
-        WHEN 2 THEN 'redirect (2)'
-        WHEN 3 THEN 'dead (3)'
-        ELSE 'unknown (' || page_item.lp_flags::text || ')'
-      END;
-
-      RETURN QUERY SELECT
-        position,
-        current_ctid,
-        flags_name,
-        page_item.t_ctid,
-        (page_item.t_ctid IS NULL OR page_item.t_ctid = current_ctid)::boolean
-      ;
-
-      IF page_item.t_ctid IS NULL OR page_item.t_ctid = current_ctid THEN
-        RETURN;
-      END IF;
-
-      next_ctid := page_item.t_ctid;
-
-      IF (next_ctid::text::point)[0]::int != block_num THEN
-        RETURN;
-      END IF;
-
-      current_ctid := next_ctid;
-      position := position + 1;
+        current_ctid := ('(' || block_num::text || ',' || page_item.lp::text || ')')::tid;
+        found_predecessor := true;
+        EXIT;
     END LOOP;
-
-    IF position = 0 THEN
-      RETURN;
+    IF NOT found_predecessor THEN
+        current_ctid := start_ctid;
     END IF;
-  END LOOP;
+
+    WHILE position < max_iterations LOOP
+        line_ptr := (current_ctid::text::point)[1]::int;
+        FOR page_item IN
+            SELECT lp, lp_flags, t_ctid
+            FROM heap_page_items(get_raw_page(rel_name, block_num))
+            WHERE lp = line_ptr
+        LOOP
+            flags_name := CASE page_item.lp_flags
+                WHEN 0 THEN 'unused (0)'
+                WHEN 1 THEN 'normal (1)'
+                WHEN 2 THEN 'redirect (2)'
+                WHEN 3 THEN 'dead (3)'
+                ELSE 'unknown (' || page_item.lp_flags::text || ')'
+            END;
+            RETURN QUERY SELECT
+                position,
+                current_ctid,
+                flags_name,
+                page_item.t_ctid,
+                (page_item.t_ctid IS NULL OR page_item.t_ctid = current_ctid)::boolean;
+
+            IF page_item.t_ctid IS NULL OR page_item.t_ctid = current_ctid THEN
+                RETURN;
+            END IF;
+            next_ctid := page_item.t_ctid;
+            IF (next_ctid::text::point)[0]::int != block_num THEN
+                RETURN;
+            END IF;
+            current_ctid := next_ctid;
+            position := position + 1;
+        END LOOP;
+        IF position = 0 THEN
+            RETURN;
+        END IF;
+    END LOOP;
 END;
 $$ LANGUAGE plpgsql;
--- Basic HOT update (update non-indexed column)
+-- ---------------------------------------------------------------------------
+-- 1. Basic HOT: update of a non-indexed column
+-- ---------------------------------------------------------------------------
 CREATE TABLE hot_test (
     id int PRIMARY KEY,
     indexed_col int,
@@ -151,239 +136,218 @@ CREATE INDEX hot_test_indexed_idx ON hot_test(indexed_col);
 INSERT INTO hot_test VALUES (1, 100, 'initial');
 INSERT INTO hot_test VALUES (2, 200, 'initial');
 INSERT INTO hot_test VALUES (3, 300, 'initial');
--- Get baseline
+SELECT pg_stat_force_next_flush();
+ pg_stat_force_next_flush 
+--------------------------
+ 
+(1 row)
+
 SELECT * FROM get_hot_count('hot_test');
  updates | hot 
 ---------+-----
        0 |   0
 (1 row)
 
--- Should be HOT updates (only non-indexed column modified)
+-- Three classic HOT updates (non-indexed col).
 UPDATE hot_test SET non_indexed_col = 'updated1' WHERE id = 1;
 UPDATE hot_test SET non_indexed_col = 'updated2' WHERE id = 2;
 UPDATE hot_test SET non_indexed_col = 'updated3' WHERE id = 3;
--- Verify HOT updates occurred
+SELECT pg_stat_force_next_flush();
+ pg_stat_force_next_flush 
+--------------------------
+ 
+(1 row)
+
 SELECT * FROM get_hot_count('hot_test');
  updates | hot 
 ---------+-----
        3 |   3
 (1 row)
 
--- Dump the HOT chain before VACUUMing
-WITH current_tuple AS (
-  SELECT ctid FROM hot_test WHERE id = 1
-)
-SELECT
-  has_hot_chain('hot_test', current_tuple.ctid) AS has_chain,
-  chain_position,
-  print_hot_chain.ctid,
-  lp_flags,
-  t_ctid
-FROM current_tuple,
-LATERAL print_hot_chain('hot_test', current_tuple.ctid);
+-- Chain-of-1 on id=1 still has a predecessor line pointer.
+WITH current_tuple AS (SELECT ctid FROM hot_test WHERE id = 1)
+SELECT has_hot_chain('hot_test', current_tuple.ctid) AS has_chain,
+       chain_position, print_hot_chain.ctid, lp_flags, t_ctid
+FROM current_tuple, LATERAL print_hot_chain('hot_test', current_tuple.ctid);
  has_chain | chain_position | ctid  |  lp_flags  | t_ctid 
 -----------+----------------+-------+------------+--------
  t         |              0 | (0,1) | normal (1) | (0,4)
  t         |              1 | (0,4) | normal (1) | (0,4)
 (2 rows)
 
--- Vacuum the relation, expect the HOT chain to collapse
+-- VACUUM collapses the chain.
 VACUUM hot_test;
--- Show that there is no chain after vacuum
-WITH current_tuple AS (
-  SELECT ctid FROM hot_test WHERE id = 1
-)
-SELECT
-  has_hot_chain('hot_test', current_tuple.ctid) AS has_chain,
-  chain_position,
-  print_hot_chain.ctid,
-  lp_flags,
-  t_ctid
-FROM current_tuple,
-LATERAL print_hot_chain('hot_test', current_tuple.ctid);
+WITH current_tuple AS (SELECT ctid FROM hot_test WHERE id = 1)
+SELECT has_hot_chain('hot_test', current_tuple.ctid) AS has_chain,
+       chain_position, print_hot_chain.ctid, lp_flags, t_ctid
+FROM current_tuple, LATERAL print_hot_chain('hot_test', current_tuple.ctid);
  has_chain | chain_position | ctid  |  lp_flags  | t_ctid 
 -----------+----------------+-------+------------+--------
  f         |              0 | (0,4) | normal (1) | (0,4)
 (1 row)
 
--- Non-HOT update (update indexed column)
-UPDATE hot_test SET indexed_col = 150 WHERE id = 1;
+DROP TABLE hot_test;
+-- ---------------------------------------------------------------------------
+-- 2. Summarizing indexes (BRIN) do not block HOT
+-- ---------------------------------------------------------------------------
+CREATE TABLE hot_test (
+    id int PRIMARY KEY,
+    ts timestamp,
+    value int,
+    brin_col int
+) WITH (fillfactor = 50);
+CREATE INDEX hot_test_ts_brin ON hot_test USING brin(ts);
+CREATE INDEX hot_test_brin_col_brin ON hot_test USING brin(brin_col);
+INSERT INTO hot_test VALUES (1, '2024-01-01', 100, 1000);
+-- BRIN columns are summarizing; updating them stays classic HOT even
+-- though their values change.
+UPDATE hot_test SET ts = '2024-01-02', brin_col = 2000 WHERE id = 1;
+SELECT pg_stat_force_next_flush();
+ pg_stat_force_next_flush 
+--------------------------
+ 
+(1 row)
+
 SELECT * FROM get_hot_count('hot_test');
  updates | hot 
 ---------+-----
-       4 |   3
+       1 |   1
 (1 row)
 
--- Verify index was updated (new value findable)
-SET enable_seqscan = off;
-EXPLAIN (COSTS OFF) SELECT id, indexed_col FROM hot_test WHERE indexed_col = 150;
-                    QUERY PLAN                     
----------------------------------------------------
- Index Scan using hot_test_indexed_idx on hot_test
-   Index Cond: (indexed_col = 150)
-(2 rows)
-
-SELECT id, indexed_col FROM hot_test WHERE indexed_col = 150;
- id | indexed_col 
-----+-------------
-  1 |         150
+-- Non-indexed column: also HOT.
+UPDATE hot_test SET value = 200 WHERE id = 1;
+SELECT pg_stat_force_next_flush();
+ pg_stat_force_next_flush 
+--------------------------
+ 
 (1 row)
 
--- Verify old value no longer in index
-EXPLAIN (COSTS OFF) SELECT id FROM hot_test WHERE indexed_col = 100;
-                    QUERY PLAN                     
----------------------------------------------------
- Index Scan using hot_test_indexed_idx on hot_test
-   Index Cond: (indexed_col = 100)
-(2 rows)
-
-SELECT id FROM hot_test WHERE indexed_col = 100;
- id 
-----
-(0 rows)
+SELECT * FROM get_hot_count('hot_test');
+ updates | hot 
+---------+-----
+       2 |   2
+(1 row)
 
-RESET enable_seqscan;
--- All-or-none property: updating one indexed column requires ALL index updates
 DROP TABLE hot_test;
+-- ---------------------------------------------------------------------------
+-- 3. TOAST participates in HOT (non-indexed column paths only)
+-- ---------------------------------------------------------------------------
 CREATE TABLE hot_test (
     id int PRIMARY KEY,
-    col_a int,
-    col_b int,
-    col_c int,
-    non_indexed text
+    indexed_col int,
+    large_text text,
+    small_text text
 ) WITH (fillfactor = 50);
-CREATE INDEX hot_test_a_idx ON hot_test(col_a);
-CREATE INDEX hot_test_b_idx ON hot_test(col_b);
-CREATE INDEX hot_test_c_idx ON hot_test(col_c);
-INSERT INTO hot_test VALUES (1, 10, 20, 30, 'initial');
--- Update only col_a - should NOT be HOT because an indexed column changed
--- This means ALL indexes must be updated (all-or-none property)
-UPDATE hot_test SET col_a = 15 WHERE id = 1;
+CREATE INDEX hot_test_idx ON hot_test(indexed_col);
+INSERT INTO hot_test VALUES (1, 100, repeat('x', 3000), 'small');
+-- Non-indexed, non-TOAST column: HOT.
+UPDATE hot_test SET small_text = 'updated';
+SELECT pg_stat_force_next_flush();
+ pg_stat_force_next_flush 
+--------------------------
+ 
+(1 row)
+
 SELECT * FROM get_hot_count('hot_test');
  updates | hot 
 ---------+-----
-       1 |   0
+       1 |   1
+(1 row)
+
+-- TOAST column, indexed_col unchanged: HOT.
+UPDATE hot_test SET large_text = repeat('y', 3000);
+SELECT pg_stat_force_next_flush();
+ pg_stat_force_next_flush 
+--------------------------
+ 
 (1 row)
 
--- Now update only non-indexed column - should be HOT
-UPDATE hot_test SET non_indexed = 'updated';
 SELECT * FROM get_hot_count('hot_test');
  updates | hot 
 ---------+-----
-       2 |   1
+       2 |   2
 (1 row)
 
--- Partial index: both old and new outside predicate (conservative = non-HOT)
 DROP TABLE hot_test;
+-- ---------------------------------------------------------------------------
+-- 4. Partial index where update leaves indexed attrs unchanged
+-- ---------------------------------------------------------------------------
 CREATE TABLE hot_test (
     id int PRIMARY KEY,
     status text,
     data text
 ) WITH (fillfactor = 50);
--- Partial index only covers status = 'active'
 CREATE INDEX hot_test_active_idx ON hot_test(status) WHERE status = 'active';
 INSERT INTO hot_test VALUES (1, 'active', 'data1');
 INSERT INTO hot_test VALUES (2, 'inactive', 'data2');
 INSERT INTO hot_test VALUES (3, 'deleted', 'data3');
--- Update non-indexed column on 'active' row (in predicate, status unchanged)
--- Should be HOT
+-- Update data on a row whose status matches the partial predicate: HOT.
 UPDATE hot_test SET data = 'updated1' WHERE id = 1;
+SELECT pg_stat_force_next_flush();
+ pg_stat_force_next_flush 
+--------------------------
+ 
+(1 row)
+
 SELECT * FROM get_hot_count('hot_test');
  updates | hot 
 ---------+-----
        1 |   1
 (1 row)
 
--- Update non-indexed column on 'inactive' row (outside predicate)
--- Should be HOT
+-- Update data on a row outside the predicate: HOT.
 UPDATE hot_test SET data = 'updated2' WHERE id = 2;
-SELECT * FROM get_hot_count('hot_test');
- updates | hot 
----------+-----
-       2 |   2
+SELECT pg_stat_force_next_flush();
+ pg_stat_force_next_flush 
+--------------------------
+ 
 (1 row)
 
--- Update status from 'inactive' to 'deleted' (both outside predicate)
--- PostgreSQL is conservative: heap insert happens before predicate check
--- So this is NON-HOT even though both values are outside predicate
-UPDATE hot_test SET status = 'deleted' WHERE id = 2;
 SELECT * FROM get_hot_count('hot_test');
  updates | hot 
 ---------+-----
-       3 |   2
+       2 |   2
 (1 row)
 
--- Verify index still works for 'active' rows
 SELECT id, status FROM hot_test WHERE status = 'active';
  id | status 
 ----+--------
   1 | active
 (1 row)
 
--- Only BRIN (summarizing) indexes on non-PK columns
 DROP TABLE hot_test;
+-- ---------------------------------------------------------------------------
+-- 5. Multi-column btree: update of non-indexed column
+-- ---------------------------------------------------------------------------
 CREATE TABLE hot_test (
     id int PRIMARY KEY,
-    ts timestamp,
-    value int,
-    brin_col int
-) WITH (fillfactor = 50);
-CREATE INDEX hot_test_ts_brin ON hot_test USING brin(ts);
-CREATE INDEX hot_test_brin_col_brin ON hot_test USING brin(brin_col);
-INSERT INTO hot_test VALUES (1, '2024-01-01', 100, 1000);
--- Update both BRIN columns - should still be HOT (only summarizing indexes)
-UPDATE hot_test SET ts = '2024-01-02', brin_col = 2000 WHERE id = 1;
-SELECT * FROM get_hot_count('hot_test');
- updates | hot 
----------+-----
-       1 |   1
-(1 row)
-
--- Update non-indexed column - should also be HOT
-UPDATE hot_test SET value = 200 WHERE id = 1;
-SELECT * FROM get_hot_count('hot_test');
- updates | hot 
----------+-----
-       2 |   2
-(1 row)
-
--- TOAST and HOT: TOASTed columns can participate in HOT
-DROP TABLE hot_test;
-CREATE TABLE hot_test (
-    id int PRIMARY KEY,
-    indexed_col int,
-    large_text text,
-    small_text text
+    col_a int,
+    col_b int,
+    col_c int,
+    data text
 ) WITH (fillfactor = 50);
-CREATE INDEX hot_test_idx ON hot_test(indexed_col);
--- Insert row with TOASTed column (> 2KB)
-INSERT INTO hot_test VALUES (1, 100, repeat('x', 3000), 'small');
--- Update non-indexed, non-TOASTed column - should be HOT
-UPDATE hot_test SET small_text = 'updated';
-SELECT * FROM get_hot_count('hot_test');
- updates | hot 
----------+-----
-       1 |   1
+CREATE INDEX hot_test_ab_idx ON hot_test(col_a, col_b);
+INSERT INTO hot_test VALUES (1, 10, 20, 30, 'data');
+-- col_c not in any index: HOT.
+UPDATE hot_test SET col_c = 35;
+-- data not in any index: HOT.
+UPDATE hot_test SET data = 'updated';
+SELECT pg_stat_force_next_flush();
+ pg_stat_force_next_flush 
+--------------------------
+ 
 (1 row)
 
--- Update TOASTed column - should be HOT if indexed column unchanged
-UPDATE hot_test SET large_text = repeat('y', 3000);
 SELECT * FROM get_hot_count('hot_test');
  updates | hot 
 ---------+-----
        2 |   2
 (1 row)
 
--- Update indexed column - should NOT be HOT
-UPDATE hot_test SET indexed_col = 200;
-SELECT * FROM get_hot_count('hot_test');
- updates | hot 
----------+-----
-       3 |   2
-(1 row)
-
--- Unique constraint (unique index) behaves like regular index
 DROP TABLE hot_test;
+-- ---------------------------------------------------------------------------
+-- 6. Unique index: update of non-indexed column + uniqueness enforcement
+-- ---------------------------------------------------------------------------
 CREATE TABLE hot_test (
     id int PRIMARY KEY,
     unique_col int UNIQUE,
@@ -391,15 +355,19 @@ CREATE TABLE hot_test (
 ) WITH (fillfactor = 50);
 INSERT INTO hot_test VALUES (1, 100, 'data1');
 INSERT INTO hot_test VALUES (2, 200, 'data2');
--- Update data (non-indexed) - should be HOT
 UPDATE hot_test SET data = 'updated';
+SELECT pg_stat_force_next_flush();
+ pg_stat_force_next_flush 
+--------------------------
+ 
+(1 row)
+
 SELECT * FROM get_hot_count('hot_test');
  updates | hot 
 ---------+-----
        2 |   2
 (1 row)
 
--- Verify unique constraint still enforced
 SELECT id, unique_col, data FROM hot_test ORDER BY id;
  id | unique_col |  data   
 ----+------------+---------
@@ -407,60 +375,14 @@ SELECT id, unique_col, data FROM hot_test ORDER BY id;
   2 |        200 | updated
 (2 rows)
 
--- This should fail (unique violation)
+-- Unique constraint still enforced on any path.
 UPDATE hot_test SET unique_col = 100 WHERE id = 2;
 ERROR:  duplicate key value violates unique constraint "hot_test_unique_col_key"
 DETAIL:  Key (unique_col)=(100) already exists.
--- Multi-column index: any column change = non-HOT
 DROP TABLE hot_test;
-CREATE TABLE hot_test (
-    id int PRIMARY KEY,
-    col_a int,
-    col_b int,
-    col_c int,
-    data text
-) WITH (fillfactor = 50);
-CREATE INDEX hot_test_ab_idx ON hot_test(col_a, col_b);
-INSERT INTO hot_test VALUES (1, 10, 20, 30, 'data');
--- Update col_a (part of multi-column index) - should NOT be HOT
-UPDATE hot_test SET col_a = 15;
-SELECT * FROM get_hot_count('hot_test');
- updates | hot 
----------+-----
-       1 |   0
-(1 row)
-
--- Reset
-UPDATE hot_test SET col_a = 10;
--- Update col_b (part of multi-column index) - should NOT be HOT
-UPDATE hot_test SET col_b = 25;
-SELECT * FROM get_hot_count('hot_test');
- updates | hot 
----------+-----
-       3 |   0
-(1 row)
-
--- Reset
-UPDATE hot_test SET col_b = 20;
-SELECT * FROM get_hot_count('hot_test');
- updates | hot 
----------+-----
-       4 |   0
-(1 row)
-
--- Update col_c (not indexed) - should be HOT
-UPDATE hot_test SET col_c = 35;
--- Update data (not indexed) - should be HOT
-UPDATE hot_test SET data = 'updated';
-SELECT * FROM get_hot_count('hot_test');
- updates | hot 
----------+-----
-       6 |   2
-(1 row)
-
--- Partitioned tables: HOT works within partitions
-DROP TABLE IF EXISTS hot_test_partitioned CASCADE;
-NOTICE:  table "hot_test_partitioned" does not exist, skipping
+-- ---------------------------------------------------------------------------
+-- 7. Partitioned tables: HOT within a partition
+-- ---------------------------------------------------------------------------
 CREATE TABLE hot_test_partitioned (
     id int,
     partition_key int,
@@ -475,23 +397,32 @@ CREATE TABLE hot_test_part2 PARTITION OF hot_test_partitioned
 CREATE INDEX hot_test_part_idx ON hot_test_partitioned(indexed_col);
 INSERT INTO hot_test_partitioned VALUES (1, 50, 100, 'initial1');
 INSERT INTO hot_test_partitioned VALUES (2, 150, 200, 'initial2');
--- Update in partition 1 (non-indexed column) - should be HOT
 UPDATE hot_test_partitioned SET data = 'updated1' WHERE id = 1;
--- Update in partition 2 (non-indexed column) - should be HOT
 UPDATE hot_test_partitioned SET data = 'updated2' WHERE id = 2;
+SELECT pg_stat_force_next_flush();
+ pg_stat_force_next_flush 
+--------------------------
+ 
+(1 row)
+
 SELECT * FROM get_hot_count('hot_test_part1');
  updates | hot 
 ---------+-----
        1 |   1
 (1 row)
 
+SELECT pg_stat_force_next_flush();
+ pg_stat_force_next_flush 
+--------------------------
+ 
+(1 row)
+
 SELECT * FROM get_hot_count('hot_test_part2');
  updates | hot 
 ---------+-----
        1 |   1
 (1 row)
 
--- Verify indexes work on partitions
 SELECT id FROM hot_test_partitioned WHERE indexed_col = 100;
  id 
 ----
@@ -504,242 +435,100 @@ SELECT id FROM hot_test_partitioned WHERE indexed_col = 200;
   2
 (1 row)
 
--- Update indexed column in partition - should NOT be HOT
-UPDATE hot_test_partitioned SET indexed_col = 150 WHERE id = 1;
-SELECT * FROM get_hot_count('hot_test_part1');
- updates | hot 
----------+-----
-       2 |   1
-(1 row)
-
--- Verify index was updated
-SELECT id FROM hot_test_partitioned WHERE indexed_col = 150;
- id 
-----
-  1
-(1 row)
-
--- ============================================================================
--- Trigger modifications: heap_modify_tuple() and HOT
--- ============================================================================
--- Test that we correctly detect when triggers modify indexed columns via
--- heap_modify_tuple(), even when those columns aren't in the UPDATE's SET clause
-CREATE TABLE hot_trigger_test (
-    id int PRIMARY KEY,
-    triggered_col int,
-    data text
-) WITH (fillfactor = 50);
-CREATE INDEX hot_trigger_idx ON hot_trigger_test(triggered_col);
--- Create a trigger that modifies an indexed column
-CREATE OR REPLACE FUNCTION modify_triggered_col()
-RETURNS TRIGGER AS $$
-BEGIN
-    NEW.triggered_col = NEW.triggered_col + 1;
-    RETURN NEW;
-END;
-$$ LANGUAGE plpgsql;
-CREATE TRIGGER before_update_modify
-    BEFORE UPDATE ON hot_trigger_test
-    FOR EACH ROW
-    EXECUTE FUNCTION modify_triggered_col();
-INSERT INTO hot_trigger_test VALUES (1, 100, 'initial');
-SELECT * FROM get_hot_count('hot_trigger_test');
- updates | hot 
----------+-----
-       0 |   0
-(1 row)
-
--- Update only data column, but trigger modifies indexed column
--- Should NOT be HOT because trigger modified an indexed column
-UPDATE hot_trigger_test SET data = 'updated' WHERE id = 1;
--- Verify it was NOT a HOT update (indexed column was modified by trigger)
-SELECT * FROM get_hot_count('hot_trigger_test');
- updates | hot 
----------+-----
-       1 |   0
-(1 row)
-
--- Verify the triggered column was actually modified
-SELECT triggered_col FROM hot_trigger_test WHERE id = 1;
- triggered_col 
----------------
-           101
-(1 row)
-
-DROP TABLE hot_trigger_test CASCADE;
-DROP FUNCTION modify_triggered_col();
--- ============================================================================
--- JSONB expression indexes and sub-attribute tracking
--- ============================================================================
--- Test that updates to non-indexed JSONB paths can be HOT updates
+DROP TABLE hot_test_partitioned CASCADE;
+-- ---------------------------------------------------------------------------
+-- 8. JSONB expression index: non-indexed path change is HOT
+-- ---------------------------------------------------------------------------
 CREATE TABLE hot_jsonb_test (
     id int PRIMARY KEY,
     data jsonb
 ) WITH (fillfactor = 50);
--- Create expression index on a specific JSON path
 CREATE INDEX hot_jsonb_name_idx ON hot_jsonb_test ((data->>'name'));
 INSERT INTO hot_jsonb_test VALUES
     (1, '{"name":"Alice","age":30,"city":"NYC"}'),
     (2, '{"name":"Bob","age":25,"city":"LA"}');
-SELECT * FROM get_hot_count('hot_jsonb_test');
- updates | hot 
----------+-----
-       0 |   0
+-- The jsonb column is the expression index's input, so HOT-indexed is
+-- disqualified (expression indexes are not yet supported) and the jsonb
+-- change blocks classic HOT: non-HOT update.
+UPDATE hot_jsonb_test SET data = jsonb_set(data, '{age}', '31') WHERE id = 1;
+SELECT pg_stat_force_next_flush();
+ pg_stat_force_next_flush 
+--------------------------
+ 
 (1 row)
 
--- Update non-indexed JSON path (age) - should be HOT after instrumentation
-UPDATE hot_jsonb_test SET data = jsonb_set(data, '{age}', '31') WHERE id = 1;
 SELECT * FROM get_hot_count('hot_jsonb_test');
  updates | hot 
 ---------+-----
        1 |   0
 (1 row)
 
--- Update indexed JSON path (name) - should NOT be HOT
-UPDATE hot_jsonb_test SET data = jsonb_set(data, '{name}', '"Alice2"') WHERE id = 1;
+-- Likewise non-HOT: expression index disqualifies HOT-indexed.
+UPDATE hot_jsonb_test SET data = data - 'city' WHERE id = 2;
+SELECT pg_stat_force_next_flush();
+ pg_stat_force_next_flush 
+--------------------------
+ 
+(1 row)
+
 SELECT * FROM get_hot_count('hot_jsonb_test');
  updates | hot 
 ---------+-----
        2 |   0
 (1 row)
 
--- Verify index works
-SELECT id FROM hot_jsonb_test WHERE data->>'name' = 'Alice2';
- id 
-----
-  1
+-- Likewise non-HOT: expression index disqualifies HOT-indexed.
+UPDATE hot_jsonb_test SET data = jsonb_insert(data, '{country}', '"USA"') WHERE id = 2;
+SELECT pg_stat_force_next_flush();
+ pg_stat_force_next_flush 
+--------------------------
+ 
 (1 row)
 
--- Test jsonb_delete on non-indexed path - should be HOT after instrumentation
-UPDATE hot_jsonb_test SET data = data - 'city' WHERE id = 2;
 SELECT * FROM get_hot_count('hot_jsonb_test');
  updates | hot 
 ---------+-----
        3 |   0
 (1 row)
 
--- Test jsonb_insert on non-indexed path - should be HOT after instrumentation
-UPDATE hot_jsonb_test SET data = jsonb_insert(data, '{country}', '"USA"') WHERE id = 2;
-SELECT * FROM get_hot_count('hot_jsonb_test');
- updates | hot 
----------+-----
-       4 |   0
-(1 row)
-
 DROP TABLE hot_jsonb_test;
--- ============================================================================
--- XML expression indexes and sub-attribute tracking
--- ============================================================================
--- Test that updates to non-indexed XML paths can be HOT updates
-CREATE TABLE hot_xml_test (
-    id int PRIMARY KEY,
-    doc xml
-) WITH (fillfactor = 50);
--- Create expression index on a specific XPath
-CREATE INDEX hot_xml_name_idx ON hot_xml_test ((xpath('/person/name/text()', doc)));
-INSERT INTO hot_xml_test VALUES
-    (1, '<person><name>Alice</name><age>30</age></person>'),
-    (2, '<person><name>Bob</name><age>25</age></person>');
-ERROR:  could not identify a comparison function for type xml
-SELECT * FROM get_hot_count('hot_xml_test');
- updates | hot 
----------+-----
-       0 |   0
-(1 row)
-
--- Update non-indexed XPath (age) - behavior depends on XML comparison fallback
--- Full XML value replacement means non-indexed path updates still require index comparison
-UPDATE hot_xml_test SET doc = '<person><name>Alice</name><age>31</age></person>' WHERE id = 1;
-SELECT * FROM get_hot_count('hot_xml_test');
- updates | hot 
----------+-----
-       0 |   0
-(1 row)
-
--- Update indexed XPath (name) - should NOT be HOT
-UPDATE hot_xml_test SET doc = '<person><name>Alice2</name><age>31</age></person>' WHERE id = 1;
-SELECT * FROM get_hot_count('hot_xml_test');
- updates | hot 
----------+-----
-       0 |   0
-(1 row)
-
--- Verify index works
-SELECT id FROM hot_xml_test WHERE xpath('/person/name/text()', doc) = ARRAY['Alice2'::text];
-ERROR:  operator does not exist: xml[] = text[]
-LINE 1: ..._xml_test WHERE xpath('/person/name/text()', doc) = ARRAY['A...
-                                                             ^
-DETAIL:  No operator of that name accepts the given argument types.
-HINT:  You might need to add explicit type casts.
-DROP TABLE hot_xml_test;
--- ============================================================================
--- GIN indexes and amcomparedatums for JSONB
--- ============================================================================
--- Test that GIN indexes can use amcomparedatums to enable HOT when extracted keys match
+-- ---------------------------------------------------------------------------
+-- 9. A change to a GIN-indexed column is HOT-indexed
+--
+-- The read side filters a stale leaf via the crossed-attribute bitmap, which
+-- is access-method agnostic, so a GIN-covered column is HOT-indexed like any
+-- other: only the GIN index is maintained, and a GIN scan (which rechecks on
+-- the heap) returns correct results across the chain.
+-- ---------------------------------------------------------------------------
 CREATE TABLE hot_gin_test (
     id int PRIMARY KEY,
     tags text[],
     properties jsonb
 ) WITH (fillfactor = 50);
--- GIN index on text array
 CREATE INDEX hot_gin_tags_idx ON hot_gin_test USING gin (tags);
--- GIN index on JSONB (jsonb_ops - keys and values)
 CREATE INDEX hot_gin_props_idx ON hot_gin_test USING gin (properties);
 INSERT INTO hot_gin_test VALUES
     (1, ARRAY['tag1', 'tag2'], '{"key1":"val1","key2":"val2"}'),
     (2, ARRAY['tag3', 'tag4'], '{"key3":"val3","key4":"val4"}');
-SELECT * FROM get_hot_count('hot_gin_test');
- updates | hot 
----------+-----
-       0 |   0
-(1 row)
-
--- Update that changes tag order but not content - after amcomparedatums should be HOT
--- (GIN extracts same keys, just different order)
+-- Reorder tags: a GIN-covered column changes, so this is HOT-indexed.
 UPDATE hot_gin_test SET tags = ARRAY['tag2', 'tag1'] WHERE id = 1;
-SELECT * FROM get_hot_count('hot_gin_test');
- updates | hot 
----------+-----
-       1 |   0
-(1 row)
-
--- Update JSONB value (not key) - after amcomparedatums may be HOT or non-HOT
--- depending on GIN operator class (jsonb_ops indexes both keys and values)
-UPDATE hot_gin_test SET properties = '{"key1":"val1_new","key2":"val2"}' WHERE id = 1;
-SELECT * FROM get_hot_count('hot_gin_test');
- updates | hot 
----------+-----
-       2 |   0
+SELECT pg_stat_force_next_flush();
+ pg_stat_force_next_flush 
+--------------------------
+ 
 (1 row)
 
--- Add new tag - should NOT be HOT (different extracted keys)
-UPDATE hot_gin_test SET tags = ARRAY['tag2', 'tag1', 'tag5'] WHERE id = 1;
 SELECT * FROM get_hot_count('hot_gin_test');
  updates | hot 
 ---------+-----
-       3 |   0
-(1 row)
-
--- Verify GIN indexes work
-SELECT id FROM hot_gin_test WHERE tags @> ARRAY['tag5'];
- id 
-----
-  1
-(1 row)
-
-SELECT id FROM hot_gin_test WHERE properties @> '{"key1":"val1_new"}';
- id 
-----
-  1
+       1 |   1
 (1 row)
 
 DROP TABLE hot_gin_test;
--- ============================================================================
+-- ---------------------------------------------------------------------------
 -- Cleanup
--- ============================================================================
-DROP TABLE IF EXISTS hot_test;
-DROP TABLE IF EXISTS hot_test_partitioned CASCADE;
-DROP FUNCTION IF EXISTS has_hot_chain(text, tid);
-DROP FUNCTION IF EXISTS print_hot_chain(text, tid);
-DROP FUNCTION IF EXISTS get_hot_count(text);
+-- ---------------------------------------------------------------------------
+DROP FUNCTION has_hot_chain(text, tid);
+DROP FUNCTION print_hot_chain(text, tid);
+DROP FUNCTION get_hot_count(text);
 DROP EXTENSION pageinspect;
diff --git a/src/test/regress/sql/hot_updates.sql b/src/test/regress/sql/hot_updates.sql
index a8894006177..35ce7e1cdcd 100644
--- a/src/test/regress/sql/hot_updates.sql
+++ b/src/test/regress/sql/hot_updates.sql
@@ -1,354 +1,258 @@
 --
 -- HOT_UPDATES
--- Test Heap-Only Tuple (HOT) update decisions
+-- Test classic Heap-Only Tuple (HOT) update decisions
 --
--- This test systematically verifies that HOT updates are used when appropriate
--- and avoided when necessary (e.g., when indexed columns are modified).
+-- This file covers HOT decisions that apply identically on a pre-hot-indexed
+-- server: every UPDATE here either leaves all indexed attributes
+-- unchanged or touches only summarizing-index (BRIN) attributes, so the
+-- HOT vs non-HOT choice does not depend on whether Selective Index
+-- Update (hot-indexed) is enabled.  hot-indexed-specific behaviour (UPDATEs that modify
+-- a non-summarizing indexed attribute) is covered in
+-- hot_indexed_updates.sql.
 --
--- We use multiple validation methods:
--- 1. Statistics functions (pg_stat_get_tuples_hot_updated)
--- 2. pageinspect extension for HOT chain examination
--- 3. EXPLAIN to verify index usage after updates
+-- Validation methods:
+--   1. Statistics (pg_stat_get_tuples_hot_updated)
+--   2. pageinspect for HOT chain structure
+--   3. EXPLAIN to confirm the planner still picks the index
 --
 
 -- Load required extensions
 CREATE EXTENSION IF NOT EXISTS pageinspect;
 
--- Function to get HOT update count
+-- Sum of committed and in-progress (non-HOT, HOT) update counters.
 CREATE OR REPLACE FUNCTION get_hot_count(rel_name text)
 RETURNS TABLE (
     updates BIGINT,
     hot BIGINT
 ) AS $$
 DECLARE
-  rel_oid oid;
+    rel_oid oid;
 BEGIN
-  rel_oid := rel_name::regclass::oid;
-
-  -- Read both committed and transaction-local stats
-  -- In autocommit mode (default for regression tests), this works correctly
-  -- Note: In explicit transactions (BEGIN/COMMIT), committed stats already
-  -- include flushed updates, so this would double-count. For explicit
-  -- transaction testing, call pg_stat_force_next_flush() before this function.
-  updates := COALESCE(pg_stat_get_tuples_updated(rel_oid), 0) +
-             COALESCE(pg_stat_get_xact_tuples_updated(rel_oid), 0);
-  hot := COALESCE(pg_stat_get_tuples_hot_updated(rel_oid), 0) +
-         COALESCE(pg_stat_get_xact_tuples_hot_updated(rel_oid), 0);
-
-  RETURN NEXT;
+    rel_oid := rel_name::regclass::oid;
+    updates := COALESCE(pg_stat_get_tuples_updated(rel_oid), 0) +
+               COALESCE(pg_stat_get_xact_tuples_updated(rel_oid), 0);
+    hot := COALESCE(pg_stat_get_tuples_hot_updated(rel_oid), 0) +
+           COALESCE(pg_stat_get_xact_tuples_hot_updated(rel_oid), 0);
+    RETURN NEXT;
 END;
 $$ LANGUAGE plpgsql;
 
--- Check if a tuple is part of a HOT chain (has a predecessor on same page)
+-- True iff target_ctid is the TAIL of a HOT chain on the same page.
 CREATE OR REPLACE FUNCTION has_hot_chain(rel_name text, target_ctid tid)
 RETURNS boolean AS $$
 DECLARE
-  block_num int;
-  page_item record;
+    block_num int;
+    page_item record;
 BEGIN
-  block_num := (target_ctid::text::point)[0]::int;
-
-  -- Look for a different tuple on the same page that points to our target tuple
-  FOR page_item IN
-    SELECT lp, lp_flags, t_ctid
-    FROM heap_page_items(get_raw_page(rel_name, block_num))
-    WHERE lp_flags = 1
-      AND t_ctid IS NOT NULL
-      AND t_ctid = target_ctid
-      AND ('(' || block_num::text || ',' || lp::text || ')')::tid != target_ctid
-  LOOP
-    RETURN true;
-  END LOOP;
-
-  RETURN false;
+    block_num := (target_ctid::text::point)[0]::int;
+    FOR page_item IN
+        SELECT lp, lp_flags, t_ctid
+        FROM heap_page_items(get_raw_page(rel_name, block_num))
+        WHERE lp_flags = 1
+          AND t_ctid IS NOT NULL
+          AND t_ctid = target_ctid
+          AND ('(' || block_num::text || ',' || lp::text || ')')::tid != target_ctid
+    LOOP
+        RETURN true;
+    END LOOP;
+    RETURN false;
 END;
 $$ LANGUAGE plpgsql;
 
--- Print the HOT chain starting from a given tuple
+-- Emit the HOT chain rooted at start_ctid.
 CREATE OR REPLACE FUNCTION print_hot_chain(rel_name text, start_ctid tid)
 RETURNS TABLE(chain_position int, ctid tid, lp_flags text, t_ctid tid, chain_end boolean) AS
 $$
 #variable_conflict use_column
 DECLARE
-  block_num int;
-  line_ptr int;
-  current_ctid tid := start_ctid;
-  next_ctid tid;
-  position int := 0;
-  max_iterations int := 100;
-  page_item record;
-  found_predecessor boolean := false;
-  flags_name text;
+    block_num int;
+    line_ptr int;
+    current_ctid tid := start_ctid;
+    next_ctid tid;
+    position int := 0;
+    max_iterations int := 100;
+    page_item record;
+    found_predecessor boolean := false;
+    flags_name text;
 BEGIN
-  block_num := (start_ctid::text::point)[0]::int;
-
-  -- Find the predecessor (old tuple pointing to our start_ctid)
-  FOR page_item IN
-    SELECT lp, lp_flags, t_ctid
-    FROM heap_page_items(get_raw_page(rel_name, block_num))
-    WHERE lp_flags = 1
-      AND t_ctid = start_ctid
-  LOOP
-    current_ctid := ('(' || block_num::text || ',' || page_item.lp::text || ')')::tid;
-    found_predecessor := true;
-    EXIT;
-  END LOOP;
-
-  -- If no predecessor found, start with the given ctid
-  IF NOT found_predecessor THEN
-    current_ctid := start_ctid;
-  END IF;
-
-  -- Follow the chain forward
-  WHILE position < max_iterations LOOP
-    line_ptr := (current_ctid::text::point)[1]::int;
+    block_num := (start_ctid::text::point)[0]::int;
 
     FOR page_item IN
-      SELECT lp, lp_flags, t_ctid
-      FROM heap_page_items(get_raw_page(rel_name, block_num))
-      WHERE lp = line_ptr
+        SELECT lp, lp_flags, t_ctid
+        FROM heap_page_items(get_raw_page(rel_name, block_num))
+        WHERE lp_flags = 1
+          AND t_ctid = start_ctid
     LOOP
-      -- Map lp_flags to names
-      flags_name := CASE page_item.lp_flags
-        WHEN 0 THEN 'unused (0)'
-        WHEN 1 THEN 'normal (1)'
-        WHEN 2 THEN 'redirect (2)'
-        WHEN 3 THEN 'dead (3)'
-        ELSE 'unknown (' || page_item.lp_flags::text || ')'
-      END;
-
-      RETURN QUERY SELECT
-        position,
-        current_ctid,
-        flags_name,
-        page_item.t_ctid,
-        (page_item.t_ctid IS NULL OR page_item.t_ctid = current_ctid)::boolean
-      ;
-
-      IF page_item.t_ctid IS NULL OR page_item.t_ctid = current_ctid THEN
-        RETURN;
-      END IF;
-
-      next_ctid := page_item.t_ctid;
-
-      IF (next_ctid::text::point)[0]::int != block_num THEN
-        RETURN;
-      END IF;
-
-      current_ctid := next_ctid;
-      position := position + 1;
+        current_ctid := ('(' || block_num::text || ',' || page_item.lp::text || ')')::tid;
+        found_predecessor := true;
+        EXIT;
     END LOOP;
-
-    IF position = 0 THEN
-      RETURN;
+    IF NOT found_predecessor THEN
+        current_ctid := start_ctid;
     END IF;
-  END LOOP;
+
+    WHILE position < max_iterations LOOP
+        line_ptr := (current_ctid::text::point)[1]::int;
+        FOR page_item IN
+            SELECT lp, lp_flags, t_ctid
+            FROM heap_page_items(get_raw_page(rel_name, block_num))
+            WHERE lp = line_ptr
+        LOOP
+            flags_name := CASE page_item.lp_flags
+                WHEN 0 THEN 'unused (0)'
+                WHEN 1 THEN 'normal (1)'
+                WHEN 2 THEN 'redirect (2)'
+                WHEN 3 THEN 'dead (3)'
+                ELSE 'unknown (' || page_item.lp_flags::text || ')'
+            END;
+            RETURN QUERY SELECT
+                position,
+                current_ctid,
+                flags_name,
+                page_item.t_ctid,
+                (page_item.t_ctid IS NULL OR page_item.t_ctid = current_ctid)::boolean;
+
+            IF page_item.t_ctid IS NULL OR page_item.t_ctid = current_ctid THEN
+                RETURN;
+            END IF;
+            next_ctid := page_item.t_ctid;
+            IF (next_ctid::text::point)[0]::int != block_num THEN
+                RETURN;
+            END IF;
+            current_ctid := next_ctid;
+            position := position + 1;
+        END LOOP;
+        IF position = 0 THEN
+            RETURN;
+        END IF;
+    END LOOP;
 END;
 $$ LANGUAGE plpgsql;
 
--- Basic HOT update (update non-indexed column)
+
+-- ---------------------------------------------------------------------------
+-- 1. Basic HOT: update of a non-indexed column
+-- ---------------------------------------------------------------------------
 CREATE TABLE hot_test (
     id int PRIMARY KEY,
     indexed_col int,
     non_indexed_col text
 ) WITH (fillfactor = 50);
-
 CREATE INDEX hot_test_indexed_idx ON hot_test(indexed_col);
 
 INSERT INTO hot_test VALUES (1, 100, 'initial');
 INSERT INTO hot_test VALUES (2, 200, 'initial');
 INSERT INTO hot_test VALUES (3, 300, 'initial');
 
--- Get baseline
+SELECT pg_stat_force_next_flush();
 SELECT * FROM get_hot_count('hot_test');
 
--- Should be HOT updates (only non-indexed column modified)
+-- Three classic HOT updates (non-indexed col).
 UPDATE hot_test SET non_indexed_col = 'updated1' WHERE id = 1;
 UPDATE hot_test SET non_indexed_col = 'updated2' WHERE id = 2;
 UPDATE hot_test SET non_indexed_col = 'updated3' WHERE id = 3;
-
--- Verify HOT updates occurred
-SELECT * FROM get_hot_count('hot_test');
-
--- Dump the HOT chain before VACUUMing
-WITH current_tuple AS (
-  SELECT ctid FROM hot_test WHERE id = 1
-)
-SELECT
-  has_hot_chain('hot_test', current_tuple.ctid) AS has_chain,
-  chain_position,
-  print_hot_chain.ctid,
-  lp_flags,
-  t_ctid
-FROM current_tuple,
-LATERAL print_hot_chain('hot_test', current_tuple.ctid);
-
--- Vacuum the relation, expect the HOT chain to collapse
-VACUUM hot_test;
-
--- Show that there is no chain after vacuum
-WITH current_tuple AS (
-  SELECT ctid FROM hot_test WHERE id = 1
-)
-SELECT
-  has_hot_chain('hot_test', current_tuple.ctid) AS has_chain,
-  chain_position,
-  print_hot_chain.ctid,
-  lp_flags,
-  t_ctid
-FROM current_tuple,
-LATERAL print_hot_chain('hot_test', current_tuple.ctid);
-
--- Non-HOT update (update indexed column)
-UPDATE hot_test SET indexed_col = 150 WHERE id = 1;
-SELECT * FROM get_hot_count('hot_test');
-
--- Verify index was updated (new value findable)
-SET enable_seqscan = off;
-EXPLAIN (COSTS OFF) SELECT id, indexed_col FROM hot_test WHERE indexed_col = 150;
-SELECT id, indexed_col FROM hot_test WHERE indexed_col = 150;
-
--- Verify old value no longer in index
-EXPLAIN (COSTS OFF) SELECT id FROM hot_test WHERE indexed_col = 100;
-SELECT id FROM hot_test WHERE indexed_col = 100;
-RESET enable_seqscan;
-
--- All-or-none property: updating one indexed column requires ALL index updates
-DROP TABLE hot_test;
-
-CREATE TABLE hot_test (
-    id int PRIMARY KEY,
-    col_a int,
-    col_b int,
-    col_c int,
-    non_indexed text
-) WITH (fillfactor = 50);
-
-CREATE INDEX hot_test_a_idx ON hot_test(col_a);
-CREATE INDEX hot_test_b_idx ON hot_test(col_b);
-CREATE INDEX hot_test_c_idx ON hot_test(col_c);
-
-INSERT INTO hot_test VALUES (1, 10, 20, 30, 'initial');
-
--- Update only col_a - should NOT be HOT because an indexed column changed
--- This means ALL indexes must be updated (all-or-none property)
-UPDATE hot_test SET col_a = 15 WHERE id = 1;
+SELECT pg_stat_force_next_flush();
 SELECT * FROM get_hot_count('hot_test');
 
--- Now update only non-indexed column - should be HOT
-UPDATE hot_test SET non_indexed = 'updated';
-SELECT * FROM get_hot_count('hot_test');
+-- Chain-of-1 on id=1 still has a predecessor line pointer.
+WITH current_tuple AS (SELECT ctid FROM hot_test WHERE id = 1)
+SELECT has_hot_chain('hot_test', current_tuple.ctid) AS has_chain,
+       chain_position, print_hot_chain.ctid, lp_flags, t_ctid
+FROM current_tuple, LATERAL print_hot_chain('hot_test', current_tuple.ctid);
 
--- Partial index: both old and new outside predicate (conservative = non-HOT)
-DROP TABLE hot_test;
-
-CREATE TABLE hot_test (
-    id int PRIMARY KEY,
-    status text,
-    data text
-) WITH (fillfactor = 50);
-
--- Partial index only covers status = 'active'
-CREATE INDEX hot_test_active_idx ON hot_test(status) WHERE status = 'active';
-
-INSERT INTO hot_test VALUES (1, 'active', 'data1');
-INSERT INTO hot_test VALUES (2, 'inactive', 'data2');
-INSERT INTO hot_test VALUES (3, 'deleted', 'data3');
-
--- Update non-indexed column on 'active' row (in predicate, status unchanged)
--- Should be HOT
-UPDATE hot_test SET data = 'updated1' WHERE id = 1;
-SELECT * FROM get_hot_count('hot_test');
-
--- Update non-indexed column on 'inactive' row (outside predicate)
--- Should be HOT
-UPDATE hot_test SET data = 'updated2' WHERE id = 2;
-SELECT * FROM get_hot_count('hot_test');
+-- VACUUM collapses the chain.
+VACUUM hot_test;
 
--- Update status from 'inactive' to 'deleted' (both outside predicate)
--- PostgreSQL is conservative: heap insert happens before predicate check
--- So this is NON-HOT even though both values are outside predicate
-UPDATE hot_test SET status = 'deleted' WHERE id = 2;
-SELECT * FROM get_hot_count('hot_test');
+WITH current_tuple AS (SELECT ctid FROM hot_test WHERE id = 1)
+SELECT has_hot_chain('hot_test', current_tuple.ctid) AS has_chain,
+       chain_position, print_hot_chain.ctid, lp_flags, t_ctid
+FROM current_tuple, LATERAL print_hot_chain('hot_test', current_tuple.ctid);
 
--- Verify index still works for 'active' rows
-SELECT id, status FROM hot_test WHERE status = 'active';
-
--- Only BRIN (summarizing) indexes on non-PK columns
 DROP TABLE hot_test;
 
+-- ---------------------------------------------------------------------------
+-- 2. Summarizing indexes (BRIN) do not block HOT
+-- ---------------------------------------------------------------------------
 CREATE TABLE hot_test (
     id int PRIMARY KEY,
     ts timestamp,
     value int,
     brin_col int
 ) WITH (fillfactor = 50);
-
 CREATE INDEX hot_test_ts_brin ON hot_test USING brin(ts);
 CREATE INDEX hot_test_brin_col_brin ON hot_test USING brin(brin_col);
 
 INSERT INTO hot_test VALUES (1, '2024-01-01', 100, 1000);
 
--- Update both BRIN columns - should still be HOT (only summarizing indexes)
+-- BRIN columns are summarizing; updating them stays classic HOT even
+-- though their values change.
 UPDATE hot_test SET ts = '2024-01-02', brin_col = 2000 WHERE id = 1;
+SELECT pg_stat_force_next_flush();
 SELECT * FROM get_hot_count('hot_test');
 
--- Update non-indexed column - should also be HOT
+-- Non-indexed column: also HOT.
 UPDATE hot_test SET value = 200 WHERE id = 1;
+SELECT pg_stat_force_next_flush();
 SELECT * FROM get_hot_count('hot_test');
 
--- TOAST and HOT: TOASTed columns can participate in HOT
 DROP TABLE hot_test;
 
+-- ---------------------------------------------------------------------------
+-- 3. TOAST participates in HOT (non-indexed column paths only)
+-- ---------------------------------------------------------------------------
 CREATE TABLE hot_test (
     id int PRIMARY KEY,
     indexed_col int,
     large_text text,
     small_text text
 ) WITH (fillfactor = 50);
-
 CREATE INDEX hot_test_idx ON hot_test(indexed_col);
 
--- Insert row with TOASTed column (> 2KB)
 INSERT INTO hot_test VALUES (1, 100, repeat('x', 3000), 'small');
 
--- Update non-indexed, non-TOASTed column - should be HOT
+-- Non-indexed, non-TOAST column: HOT.
 UPDATE hot_test SET small_text = 'updated';
+SELECT pg_stat_force_next_flush();
 SELECT * FROM get_hot_count('hot_test');
 
--- Update TOASTed column - should be HOT if indexed column unchanged
+-- TOAST column, indexed_col unchanged: HOT.
 UPDATE hot_test SET large_text = repeat('y', 3000);
+SELECT pg_stat_force_next_flush();
 SELECT * FROM get_hot_count('hot_test');
 
--- Update indexed column - should NOT be HOT
-UPDATE hot_test SET indexed_col = 200;
-SELECT * FROM get_hot_count('hot_test');
-
--- Unique constraint (unique index) behaves like regular index
 DROP TABLE hot_test;
 
+-- ---------------------------------------------------------------------------
+-- 4. Partial index where update leaves indexed attrs unchanged
+-- ---------------------------------------------------------------------------
 CREATE TABLE hot_test (
     id int PRIMARY KEY,
-    unique_col int UNIQUE,
+    status text,
     data text
 ) WITH (fillfactor = 50);
+CREATE INDEX hot_test_active_idx ON hot_test(status) WHERE status = 'active';
 
-INSERT INTO hot_test VALUES (1, 100, 'data1');
-INSERT INTO hot_test VALUES (2, 200, 'data2');
+INSERT INTO hot_test VALUES (1, 'active', 'data1');
+INSERT INTO hot_test VALUES (2, 'inactive', 'data2');
+INSERT INTO hot_test VALUES (3, 'deleted', 'data3');
 
--- Update data (non-indexed) - should be HOT
-UPDATE hot_test SET data = 'updated';
+-- Update data on a row whose status matches the partial predicate: HOT.
+UPDATE hot_test SET data = 'updated1' WHERE id = 1;
+SELECT pg_stat_force_next_flush();
 SELECT * FROM get_hot_count('hot_test');
 
--- Verify unique constraint still enforced
-SELECT id, unique_col, data FROM hot_test ORDER BY id;
+-- Update data on a row outside the predicate: HOT.
+UPDATE hot_test SET data = 'updated2' WHERE id = 2;
+SELECT pg_stat_force_next_flush();
+SELECT * FROM get_hot_count('hot_test');
 
--- This should fail (unique violation)
-UPDATE hot_test SET unique_col = 100 WHERE id = 2;
+SELECT id, status FROM hot_test WHERE status = 'active';
 
--- Multi-column index: any column change = non-HOT
 DROP TABLE hot_test;
 
+-- ---------------------------------------------------------------------------
+-- 5. Multi-column btree: update of non-indexed column
+-- ---------------------------------------------------------------------------
 CREATE TABLE hot_test (
     id int PRIMARY KEY,
     col_a int,
@@ -356,36 +260,45 @@ CREATE TABLE hot_test (
     col_c int,
     data text
 ) WITH (fillfactor = 50);
-
 CREATE INDEX hot_test_ab_idx ON hot_test(col_a, col_b);
 
 INSERT INTO hot_test VALUES (1, 10, 20, 30, 'data');
 
--- Update col_a (part of multi-column index) - should NOT be HOT
-UPDATE hot_test SET col_a = 15;
+-- col_c not in any index: HOT.
+UPDATE hot_test SET col_c = 35;
+-- data not in any index: HOT.
+UPDATE hot_test SET data = 'updated';
+SELECT pg_stat_force_next_flush();
 SELECT * FROM get_hot_count('hot_test');
 
--- Reset
-UPDATE hot_test SET col_a = 10;
-
--- Update col_b (part of multi-column index) - should NOT be HOT
-UPDATE hot_test SET col_b = 25;
-SELECT * FROM get_hot_count('hot_test');
+DROP TABLE hot_test;
 
--- Reset
-UPDATE hot_test SET col_b = 20;
-SELECT * FROM get_hot_count('hot_test');
+-- ---------------------------------------------------------------------------
+-- 6. Unique index: update of non-indexed column + uniqueness enforcement
+-- ---------------------------------------------------------------------------
+CREATE TABLE hot_test (
+    id int PRIMARY KEY,
+    unique_col int UNIQUE,
+    data text
+) WITH (fillfactor = 50);
 
--- Update col_c (not indexed) - should be HOT
-UPDATE hot_test SET col_c = 35;
+INSERT INTO hot_test VALUES (1, 100, 'data1');
+INSERT INTO hot_test VALUES (2, 200, 'data2');
 
--- Update data (not indexed) - should be HOT
 UPDATE hot_test SET data = 'updated';
+SELECT pg_stat_force_next_flush();
 SELECT * FROM get_hot_count('hot_test');
 
--- Partitioned tables: HOT works within partitions
-DROP TABLE IF EXISTS hot_test_partitioned CASCADE;
+SELECT id, unique_col, data FROM hot_test ORDER BY id;
 
+-- Unique constraint still enforced on any path.
+UPDATE hot_test SET unique_col = 100 WHERE id = 2;
+
+DROP TABLE hot_test;
+
+-- ---------------------------------------------------------------------------
+-- 7. Partitioned tables: HOT within a partition
+-- ---------------------------------------------------------------------------
 CREATE TABLE hot_test_partitioned (
     id int,
     partition_key int,
@@ -404,202 +317,82 @@ CREATE INDEX hot_test_part_idx ON hot_test_partitioned(indexed_col);
 INSERT INTO hot_test_partitioned VALUES (1, 50, 100, 'initial1');
 INSERT INTO hot_test_partitioned VALUES (2, 150, 200, 'initial2');
 
--- Update in partition 1 (non-indexed column) - should be HOT
 UPDATE hot_test_partitioned SET data = 'updated1' WHERE id = 1;
-
--- Update in partition 2 (non-indexed column) - should be HOT
 UPDATE hot_test_partitioned SET data = 'updated2' WHERE id = 2;
 
+SELECT pg_stat_force_next_flush();
 SELECT * FROM get_hot_count('hot_test_part1');
+SELECT pg_stat_force_next_flush();
 SELECT * FROM get_hot_count('hot_test_part2');
 
--- Verify indexes work on partitions
 SELECT id FROM hot_test_partitioned WHERE indexed_col = 100;
 SELECT id FROM hot_test_partitioned WHERE indexed_col = 200;
 
--- Update indexed column in partition - should NOT be HOT
-UPDATE hot_test_partitioned SET indexed_col = 150 WHERE id = 1;
-SELECT * FROM get_hot_count('hot_test_part1');
-
--- Verify index was updated
-SELECT id FROM hot_test_partitioned WHERE indexed_col = 150;
-
--- ============================================================================
--- Trigger modifications: heap_modify_tuple() and HOT
--- ============================================================================
--- Test that we correctly detect when triggers modify indexed columns via
--- heap_modify_tuple(), even when those columns aren't in the UPDATE's SET clause
-
-CREATE TABLE hot_trigger_test (
-    id int PRIMARY KEY,
-    triggered_col int,
-    data text
-) WITH (fillfactor = 50);
-
-CREATE INDEX hot_trigger_idx ON hot_trigger_test(triggered_col);
-
--- Create a trigger that modifies an indexed column
-CREATE OR REPLACE FUNCTION modify_triggered_col()
-RETURNS TRIGGER AS $$
-BEGIN
-    NEW.triggered_col = NEW.triggered_col + 1;
-    RETURN NEW;
-END;
-$$ LANGUAGE plpgsql;
-
-CREATE TRIGGER before_update_modify
-    BEFORE UPDATE ON hot_trigger_test
-    FOR EACH ROW
-    EXECUTE FUNCTION modify_triggered_col();
-
-INSERT INTO hot_trigger_test VALUES (1, 100, 'initial');
-
-SELECT * FROM get_hot_count('hot_trigger_test');
-
--- Update only data column, but trigger modifies indexed column
--- Should NOT be HOT because trigger modified an indexed column
-UPDATE hot_trigger_test SET data = 'updated' WHERE id = 1;
-
--- Verify it was NOT a HOT update (indexed column was modified by trigger)
-SELECT * FROM get_hot_count('hot_trigger_test');
-
--- Verify the triggered column was actually modified
-SELECT triggered_col FROM hot_trigger_test WHERE id = 1;
-
-DROP TABLE hot_trigger_test CASCADE;
-DROP FUNCTION modify_triggered_col();
-
--- ============================================================================
--- JSONB expression indexes and sub-attribute tracking
--- ============================================================================
--- Test that updates to non-indexed JSONB paths can be HOT updates
+DROP TABLE hot_test_partitioned CASCADE;
 
+-- ---------------------------------------------------------------------------
+-- 8. JSONB expression index: non-indexed path change is HOT
+-- ---------------------------------------------------------------------------
 CREATE TABLE hot_jsonb_test (
     id int PRIMARY KEY,
     data jsonb
 ) WITH (fillfactor = 50);
-
--- Create expression index on a specific JSON path
 CREATE INDEX hot_jsonb_name_idx ON hot_jsonb_test ((data->>'name'));
 
 INSERT INTO hot_jsonb_test VALUES
     (1, '{"name":"Alice","age":30,"city":"NYC"}'),
     (2, '{"name":"Bob","age":25,"city":"LA"}');
 
-SELECT * FROM get_hot_count('hot_jsonb_test');
-
--- Update non-indexed JSON path (age) - should be HOT after instrumentation
+-- The jsonb column is the expression index's input, so HOT-indexed is
+-- disqualified (expression indexes are not yet supported) and the jsonb
+-- change blocks classic HOT: non-HOT update.
 UPDATE hot_jsonb_test SET data = jsonb_set(data, '{age}', '31') WHERE id = 1;
-
-SELECT * FROM get_hot_count('hot_jsonb_test');
-
--- Update indexed JSON path (name) - should NOT be HOT
-UPDATE hot_jsonb_test SET data = jsonb_set(data, '{name}', '"Alice2"') WHERE id = 1;
-
+SELECT pg_stat_force_next_flush();
 SELECT * FROM get_hot_count('hot_jsonb_test');
 
--- Verify index works
-SELECT id FROM hot_jsonb_test WHERE data->>'name' = 'Alice2';
-
--- Test jsonb_delete on non-indexed path - should be HOT after instrumentation
+-- Likewise non-HOT: expression index disqualifies HOT-indexed.
 UPDATE hot_jsonb_test SET data = data - 'city' WHERE id = 2;
-
+SELECT pg_stat_force_next_flush();
 SELECT * FROM get_hot_count('hot_jsonb_test');
 
--- Test jsonb_insert on non-indexed path - should be HOT after instrumentation
+-- Likewise non-HOT: expression index disqualifies HOT-indexed.
 UPDATE hot_jsonb_test SET data = jsonb_insert(data, '{country}', '"USA"') WHERE id = 2;
-
+SELECT pg_stat_force_next_flush();
 SELECT * FROM get_hot_count('hot_jsonb_test');
 
 DROP TABLE hot_jsonb_test;
 
--- ============================================================================
--- XML expression indexes and sub-attribute tracking
--- ============================================================================
--- Test that updates to non-indexed XML paths can be HOT updates
-
-CREATE TABLE hot_xml_test (
-    id int PRIMARY KEY,
-    doc xml
-) WITH (fillfactor = 50);
-
--- Create expression index on a specific XPath
-CREATE INDEX hot_xml_name_idx ON hot_xml_test ((xpath('/person/name/text()', doc)));
-
-INSERT INTO hot_xml_test VALUES
-    (1, '<person><name>Alice</name><age>30</age></person>'),
-    (2, '<person><name>Bob</name><age>25</age></person>');
-
-SELECT * FROM get_hot_count('hot_xml_test');
-
--- Update non-indexed XPath (age) - behavior depends on XML comparison fallback
--- Full XML value replacement means non-indexed path updates still require index comparison
-UPDATE hot_xml_test SET doc = '<person><name>Alice</name><age>31</age></person>' WHERE id = 1;
-
-SELECT * FROM get_hot_count('hot_xml_test');
-
--- Update indexed XPath (name) - should NOT be HOT
-UPDATE hot_xml_test SET doc = '<person><name>Alice2</name><age>31</age></person>' WHERE id = 1;
-
-SELECT * FROM get_hot_count('hot_xml_test');
-
--- Verify index works
-SELECT id FROM hot_xml_test WHERE xpath('/person/name/text()', doc) = ARRAY['Alice2'::text];
-
-DROP TABLE hot_xml_test;
-
--- ============================================================================
--- GIN indexes and amcomparedatums for JSONB
--- ============================================================================
--- Test that GIN indexes can use amcomparedatums to enable HOT when extracted keys match
-
+-- ---------------------------------------------------------------------------
+-- 9. A change to a GIN-indexed column is HOT-indexed
+--
+-- The read side filters a stale leaf via the crossed-attribute bitmap, which
+-- is access-method agnostic, so a GIN-covered column is HOT-indexed like any
+-- other: only the GIN index is maintained, and a GIN scan (which rechecks on
+-- the heap) returns correct results across the chain.
+-- ---------------------------------------------------------------------------
 CREATE TABLE hot_gin_test (
     id int PRIMARY KEY,
     tags text[],
     properties jsonb
 ) WITH (fillfactor = 50);
-
--- GIN index on text array
 CREATE INDEX hot_gin_tags_idx ON hot_gin_test USING gin (tags);
-
--- GIN index on JSONB (jsonb_ops - keys and values)
 CREATE INDEX hot_gin_props_idx ON hot_gin_test USING gin (properties);
 
 INSERT INTO hot_gin_test VALUES
     (1, ARRAY['tag1', 'tag2'], '{"key1":"val1","key2":"val2"}'),
     (2, ARRAY['tag3', 'tag4'], '{"key3":"val3","key4":"val4"}');
 
-SELECT * FROM get_hot_count('hot_gin_test');
-
--- Update that changes tag order but not content - after amcomparedatums should be HOT
--- (GIN extracts same keys, just different order)
+-- Reorder tags: a GIN-covered column changes, so this is HOT-indexed.
 UPDATE hot_gin_test SET tags = ARRAY['tag2', 'tag1'] WHERE id = 1;
-
-SELECT * FROM get_hot_count('hot_gin_test');
-
--- Update JSONB value (not key) - after amcomparedatums may be HOT or non-HOT
--- depending on GIN operator class (jsonb_ops indexes both keys and values)
-UPDATE hot_gin_test SET properties = '{"key1":"val1_new","key2":"val2"}' WHERE id = 1;
-
+SELECT pg_stat_force_next_flush();
 SELECT * FROM get_hot_count('hot_gin_test');
 
--- Add new tag - should NOT be HOT (different extracted keys)
-UPDATE hot_gin_test SET tags = ARRAY['tag2', 'tag1', 'tag5'] WHERE id = 1;
-
-SELECT * FROM get_hot_count('hot_gin_test');
-
--- Verify GIN indexes work
-SELECT id FROM hot_gin_test WHERE tags @> ARRAY['tag5'];
-SELECT id FROM hot_gin_test WHERE properties @> '{"key1":"val1_new"}';
-
 DROP TABLE hot_gin_test;
 
--- ============================================================================
+-- ---------------------------------------------------------------------------
 -- Cleanup
--- ============================================================================
-DROP TABLE IF EXISTS hot_test;
-DROP TABLE IF EXISTS hot_test_partitioned CASCADE;
-DROP FUNCTION IF EXISTS has_hot_chain(text, tid);
-DROP FUNCTION IF EXISTS print_hot_chain(text, tid);
-DROP FUNCTION IF EXISTS get_hot_count(text);
+-- ---------------------------------------------------------------------------
+DROP FUNCTION has_hot_chain(text, tid);
+DROP FUNCTION print_hot_chain(text, tid);
+DROP FUNCTION get_hot_count(text);
 DROP EXTENSION pageinspect;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 3a2720fb5f9..cf32612edaa 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1274,6 +1274,7 @@ HeapTupleFreeze
 HeapTupleHeader
 HeapTupleHeaderData
 HeapTupleTableSlot
+HeapUpdateIndexMode
 HistControl
 HostCacheEntry
 HostsFileLoadResult
@@ -3135,7 +3136,6 @@ TSVectorStat
 TState
 TStatus
 TStoreState
-TU_UpdateIndexes
 TXNEntryFile
 TYPCATEGORY
 T_Action
-- 
2.50.1