From f8f68c999dbd52ed566a9953b35292a1218015fe Mon Sep 17 00:00:00 2001
From: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Date: Fri, 3 Jul 2026 14:46:42 +0000
Subject: [PATCH v3 4/4] Add invalidation-based retry loop for AlterPublication

Apply the same RangeVarGetRelidExtended() style retry loop to
AlterPublication()'s tables/schemas branch that was added for subscriptions
in commit XXXX.

Previously, this branch resolved the publication name and checked ownership
at the top of AlterPublication(), then locked and re-read by OID. This left a
window where concurrent DDL could have modified the ownership and/or the name
resolution

Now the tables/schemas branch has its own complete retry loop: name
resolution, ownership check, and lock acquisition all inside the loop.

Author: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Reviewed-by:
Discussion: https://postgr.es/m/akZUpiDa1UfmzYxL%40bdtpg
---
 src/backend/commands/publicationcmds.c | 105 +++++++++++++++++--------
 1 file changed, 72 insertions(+), 33 deletions(-)
 100.0% src/backend/commands/

diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index 440adb356ad..dfd707bc7d7 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -39,6 +39,7 @@
 #include "parser/parse_relation.h"
 #include "rewrite/rewriteHandler.h"
 #include "storage/lmgr.h"
+#include "storage/sinval.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
 #include "utils/inval.h"
@@ -1662,54 +1663,92 @@ AlterPublication(ParseState *pstate, AlterPublicationStmt *stmt)
 
 	rel = table_open(PublicationRelationId, RowExclusiveLock);
 
-	tup = SearchSysCacheCopy1(PUBLICATIONNAME,
-							  CStringGetDatum(stmt->pubname));
+	if (stmt->options)
+	{
+		tup = SearchSysCacheCopy1(PUBLICATIONNAME,
+								  CStringGetDatum(stmt->pubname));
 
-	if (!HeapTupleIsValid(tup))
-		ereport(ERROR,
-				(errcode(ERRCODE_UNDEFINED_OBJECT),
-				 errmsg("publication \"%s\" does not exist",
-						stmt->pubname)));
+		if (!HeapTupleIsValid(tup))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_OBJECT),
+					 errmsg("publication \"%s\" does not exist",
+							stmt->pubname)));
 
-	pubform = (Form_pg_publication) GETSTRUCT(tup);
+		pubform = (Form_pg_publication) GETSTRUCT(tup);
 
-	/* must be owner */
-	if (!object_ownercheck(PublicationRelationId, pubform->oid, GetUserId()))
-		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_PUBLICATION,
-					   stmt->pubname);
+		/* must be owner */
+		if (!object_ownercheck(PublicationRelationId, pubform->oid,
+							   GetUserId()))
+			aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_PUBLICATION,
+						   stmt->pubname);
 
-	if (stmt->options)
 		AlterPublicationOptions(pstate, stmt, rel, tup);
+	}
 	else
 	{
 		List	   *relations = NIL;
 		List	   *exceptrelations = NIL;
 		List	   *schemaidlist = NIL;
-		Oid			pubid = pubform->oid;
+		Oid			pubid;
 
-		ObjectsInPublicationToOids(stmt->pubobjects, pstate, &relations,
-								   &exceptrelations, &schemaidlist);
+		/*
+		 * Lock the publication so nobody else can do anything with it.
+		 *
+		 * Like RangeVarGetRelidExtended() does for relations, we resolve the
+		 * name, check ownership, and lock inside a loop. If invalidation
+		 * messages arrive (indicating concurrent DDL), we retry. We keep the
+		 * lock held across retries and only release it if the name resolves
+		 * to a different OID on the next iteration.
+		 */
+		{
+			Oid			oldPubId = InvalidOid;
+			bool		retry = false;
 
-		CheckAlterPublication(stmt, tup, relations, schemaidlist);
+			for (;;)
+			{
+				uint64		inval_count = SharedInvalidMessageCounter;
 
-		heap_freetuple(tup);
+				tup = SearchSysCacheCopy1(PUBLICATIONNAME,
+										  CStringGetDatum(stmt->pubname));
 
-		/* Lock the publication so nobody else can do anything with it. */
-		LockDatabaseObject(PublicationRelationId, pubid, 0,
-						   AccessExclusiveLock);
+				if (!HeapTupleIsValid(tup))
+					ereport(ERROR,
+							(errcode(ERRCODE_UNDEFINED_OBJECT),
+							 errmsg("publication \"%s\" does not exist",
+									stmt->pubname)));
 
-		/*
-		 * It is possible that by the time we acquire the lock on publication,
-		 * concurrent DDL has removed it. We can test this by checking the
-		 * existence of publication. We get the tuple again to avoid the risk
-		 * of any publication option getting changed.
-		 */
-		tup = SearchSysCacheCopy1(PUBLICATIONOID, ObjectIdGetDatum(pubid));
-		if (!HeapTupleIsValid(tup))
-			ereport(ERROR,
-					errcode(ERRCODE_UNDEFINED_OBJECT),
-					errmsg("publication \"%s\" does not exist",
-						   stmt->pubname));
+				pubform = (Form_pg_publication) GETSTRUCT(tup);
+				pubid = pubform->oid;
+
+				if (!object_ownercheck(PublicationRelationId, pubid,
+									   GetUserId()))
+					aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_PUBLICATION,
+								   stmt->pubname);
+
+				if (retry)
+				{
+					if (pubid == oldPubId)
+						break;
+					UnlockDatabaseObject(PublicationRelationId, oldPubId, 0,
+										 AccessExclusiveLock);
+				}
+
+				LockDatabaseObject(PublicationRelationId, pubid, 0,
+								   AccessExclusiveLock);
+
+				if (inval_count == SharedInvalidMessageCounter)
+					break;
+
+				retry = true;
+				oldPubId = pubid;
+				heap_freetuple(tup);
+			}
+		}
+
+		ObjectsInPublicationToOids(stmt->pubobjects, pstate, &relations,
+								   &exceptrelations, &schemaidlist);
+
+		CheckAlterPublication(stmt, tup, relations, schemaidlist);
 
 		relations = list_concat(relations, exceptrelations);
 		AlterPublicationTables(stmt, tup, relations, pstate->p_sourcetext,
-- 
2.34.1

