From: Noah Misch <noah@leadboat.com>


diff --git a/src/backend/access/heap/README.tuplock b/src/backend/access/heap/README.tuplock
index 843c2e5..16f7d78 100644
--- a/src/backend/access/heap/README.tuplock
+++ b/src/backend/access/heap/README.tuplock
@@ -199,3 +199,35 @@ under a reader holding a pin.  A reader of a heap_fetch() result tuple may
 witness a torn read.  Current inplace-updated fields are aligned and are no
 wider than four bytes, and current readers don't need consistency across
 fields.  Hence, they get by with just fetching each field once.
+
+During logical decoding, caches reflect an inplace update no later than the
+next XLOG_XACT_INVALIDATIONS.  That record witnesses the end of a command.
+Tuples of its cmin are then visible to decoding, as are inplace updates of any
+lower LSN.  Inplace updates of a higher LSN may also be visible, even if those
+updates would have been invisible to a non-historic snapshot matching
+decoding's historic snapshot.  (In other words, decoding may see inplace
+updates that were not visible to a similar snapshot taken during original
+transaction processing.)  That's a consequence of inplace update violating
+MVCC: there are no snapshot-specific versions of inplace-updated values.  This
+all makes it hard to reason about inplace-updated column reads during logical
+decoding, but the behavior does suffice for relhasindex.  A relhasindex=t in
+CREATE INDEX becomes visible no later than the new pg_index row.  While it may
+be visible earlier, that's harmless.  Finding zero indexes despite
+relhasindex=t is normal in more cases than this, e.g. after DROP INDEX.
+Example of a case that meaningfully reacts to the inplace inval:
+
+CREATE TABLE cat (c int) WITH (user_catalog_table = true);
+CREATE TABLE normal (d int);
+...
+CREATE INDEX ON cat (c)\; INSERT INTO normal VALUES (1);
+
+If the output plugin reads "cat" during decoding of the INSERT, it's fair to
+want that read to see relhasindex=t and use the new index.
+
+An alternative would be to have decoding of XLOG_HEAP_INPLACE immediately
+execute its invals.  That would behave more like invals during original
+transaction processing.  It would remove the decoding-specific delay in e.g. a
+decoding plugin witnessing a relfrozenxid change.  However, a good use case
+for that is unlikely, since the plugin would still witness relfrozenxid
+changes prematurely.  Hence, inplace update takes the trivial approach of
+delegating to XLOG_XACT_INVALIDATIONS.
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 4d382a0..7858691 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -6349,10 +6349,13 @@ heap_abort_speculative(Relation relation, const ItemPointerData *tid)
  * Since this is intended for system catalogs and SERIALIZABLE doesn't cover
  * DDL, this doesn't guarantee any particular predicate locking.
  *
- * One could modify this to return true for tuples with delete in progress,
- * All inplace updaters take a lock that conflicts with DROP.  If explicit
- * "DELETE FROM pg_class" is in progress, we'll wait for it like we would an
- * update.
+ * heap_delete() is a rarer source of blocking transactions (xwait).  We'll
+ * wait for such a transaction just like for the normal heap_update() case.
+ * Normal concurrent DROP commands won't cause that, because all inplace
+ * updaters take some lock that conflicts with DROP.  An explicit SQL "DELETE
+ * FROM pg_class" can cause it.  By waiting, if the concurrent transaction
+ * executed both "DELETE FROM pg_class" and "INSERT INTO pg_class", our caller
+ * can find the successor tuple.
  *
  * Readers of inplace-updated fields expect changes to those fields are
  * durable.  For example, vac_truncate_clog() reads datfrozenxid from
@@ -6393,15 +6396,17 @@ heap_inplace_lock(Relation relation,
 	Assert(BufferIsValid(buffer));
 
 	/*
-	 * Construct shared cache inval if necessary.  Because we pass a tuple
-	 * version without our own inplace changes or inplace changes other
-	 * sessions complete while we wait for locks, inplace update mustn't
-	 * change catcache lookup keys.  But we aren't bothering with index
-	 * updates either, so that's true a fortiori.  After LockBuffer(), it
-	 * would be too late, because this might reach a
-	 * CatalogCacheInitializeCache() that locks "buffer".
+	 * Register shared cache invals if necessary.  Other sessions may finish
+	 * inplace updates of this tuple between this step and LockTuple().  Since
+	 * inplace updates don't change cache keys, that's harmless.
+	 *
+	 * While it's tempting to register invals only after confirming we can
+	 * return true, the following obstacle precludes reordering steps that
+	 * way.  Registering invals might reach a CatalogCacheInitializeCache()
+	 * that locks "buffer".  That would hang indefinitely if running after our
+	 * own LockBuffer().  Hence, we must register invals before LockBuffer().
 	 */
-	CacheInvalidateHeapTupleInplace(relation, oldtup_ptr, NULL);
+	CacheInvalidateHeapTupleInplace(relation, oldtup_ptr);
 
 	LockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock);
 	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
@@ -6639,10 +6644,6 @@ heap_inplace_update_and_unlock(Relation relation,
 	/*
 	 * Send invalidations to shared queue.  SearchSysCacheLocked1() assumes we
 	 * do this before UnlockTuple().
-	 *
-	 * If we're mutating a tuple visible only to this transaction, there's an
-	 * equivalent transactional inval from the action that created the tuple,
-	 * and this inval is superfluous.
 	 */
 	AtInplace_Inval();
 
@@ -6653,10 +6654,10 @@ heap_inplace_update_and_unlock(Relation relation,
 	AcceptInvalidationMessages();	/* local processing of just-sent inval */
 
 	/*
-	 * Queue a transactional inval.  The immediate invalidation we just sent
-	 * is the only one known to be necessary.  To reduce risk from the
-	 * transition to immediate invalidation, continue sending a transactional
-	 * invalidation like we've long done.  Third-party code might rely on it.
+	 * Queue a transactional inval, for logical decoding and for third-party
+	 * code that might have been relying on it since long before inplace
+	 * update adopted immediate invalidation.  See README.tuplock section
+	 * "Reading inplace-updated columns" for logical decoding details.
 	 */
 	if (!IsBootstrapProcessingMode())
 		CacheInvalidateHeapTuple(relation, tuple, NULL);
diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c
index c969170..5f7b0ca 100644
--- a/src/backend/access/index/genam.c
+++ b/src/backend/access/index/genam.c
@@ -781,10 +781,11 @@ systable_endscan_ordered(SysScanDesc sysscan)
  * systable_inplace_update_begin --- update a row "in place" (overwrite it)
  *
  * Overwriting violates both MVCC and transactional safety, so the uses of
- * this function in Postgres are extremely limited.  Nonetheless we find some
- * places to use it.  See README.tuplock section "Locking to write
- * inplace-updated tables" and later sections for expectations of readers and
- * writers of a table that gets inplace updates.  Standard flow:
+ * this function in Postgres are extremely limited.  This makes no effort to
+ * support updating cache key columns or other indexed columns.  Nonetheless
+ * we find some places to use it.  See README.tuplock section "Locking to
+ * write inplace-updated tables" and later sections for expectations of
+ * readers and writers of a table that gets inplace updates.  Standard flow:
  *
  * ... [any slow preparation not requiring oldtup] ...
  * systable_inplace_update_begin([...], &tup, &inplace_state);
diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c
index cc03f07..5e15cb1 100644
--- a/src/backend/replication/logical/decode.c
+++ b/src/backend/replication/logical/decode.c
@@ -521,18 +521,9 @@ heap_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 
 			/*
 			 * Inplace updates are only ever performed on catalog tuples and
-			 * can, per definition, not change tuple visibility.  Inplace
-			 * updates don't affect storage or interpretation of table rows,
-			 * so they don't affect logicalrep_write_tuple() outcomes.  Hence,
-			 * we don't process invalidations from the original operation.  If
-			 * inplace updates did affect those things, invalidations wouldn't
-			 * make it work, since there are no snapshot-specific versions of
-			 * inplace-updated values.  Since we also don't decode catalog
-			 * tuples, we're not interested in the record's contents.
-			 *
-			 * WAL contains likely-unnecessary commit-time invals from the
-			 * CacheInvalidateHeapTuple() call in
-			 * heap_inplace_update_and_unlock(). Excess invalidation is safe.
+			 * can, per definition, not change tuple visibility.  Since we
+			 * also don't decode catalog tuples, we're not interested in the
+			 * record's contents.
 			 */
 			break;
 
diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c
index 06f736c..f75c247 100644
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -1583,13 +1583,17 @@ CacheInvalidateHeapTuple(Relation relation,
  *		implied.
  *
  * Like CacheInvalidateHeapTuple(), but for inplace updates.
+ *
+ * Just before and just after the inplace update, the tuple's cache keys must
+ * match those in key_equivalent_tuple.  Cache keys consist of catcache lookup
+ * key columns and columns referencing pg_class.oid values,
+ * e.g. pg_constraint.conrelid, which would trigger relcache inval.
  */
 void
 CacheInvalidateHeapTupleInplace(Relation relation,
-								HeapTuple tuple,
-								HeapTuple newtuple)
+								HeapTuple key_equivalent_tuple)
 {
-	CacheInvalidateHeapTupleCommon(relation, tuple, newtuple,
+	CacheInvalidateHeapTupleCommon(relation, key_equivalent_tuple, NULL,
 								   PrepareInplaceInvalidationState);
 }
 
diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h
index af46625..345733d 100644
--- a/src/include/utils/inval.h
+++ b/src/include/utils/inval.h
@@ -61,8 +61,7 @@ extern void CacheInvalidateHeapTuple(Relation relation,
 									 HeapTuple tuple,
 									 HeapTuple newtuple);
 extern void CacheInvalidateHeapTupleInplace(Relation relation,
-											HeapTuple tuple,
-											HeapTuple newtuple);
+											HeapTuple key_equivalent_tuple);
 
 extern void CacheInvalidateCatalog(Oid catalogId);