From 94a1af3393ba4d87695b67572e2a1d9b02ddffc3 Mon Sep 17 00:00:00 2001
From: Mikhail Nikalayeu <mihailnikalayeu@gmail.com>
Date: Sun, 25 Jan 2026 18:01:38 +0100
Subject: [PATCH v2] Fix duplicate arbiter detection during REINDEX
 CONCURRENTLY on partitions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 90eae926a fixed ON CONFLICT handling during REINDEX CONCURRENTLY
on partitioned tables by treating unparented indexes as potential
arbiters.  However, there's a remaining race condition: when pg_inherits
records are swapped between consecutive calls to get_partition_ancestors(),
two different child indexes can appear to have the same parent, causing
duplicate entries in the arbiter list and triggering "invalid arbiter
index list" errors.

Note that this is not a new problem introduced by 90eae926a.  The same
error could occur before that commit in a slightly different scenario:
an index is selected during planning, then index_concurrently_swap
commits, and a subsequent call to get_partition_ancestors() uses a new
catalog snapshot that sees zero ancestors for that index.

Fix by tracking which parent indexes have already been processed.  If a
subsequent call to get_partition_ancestors() returns a parent we've
already seen, treat that index as unparented instead, allowing it to be
matched via IsIndexCompatibleAsArbiter() like other concurrent reindex
scenarios.

Author: Mihail Nikalayeu <mihailnikalayeu@gmail.com>
Reported-by: Alexander Lakhin <exclusion@gmail.com>
Reviewed-by: Álvaro Herrera <alvherre@kurilemu.de>
Discussion: https://postgr.es/m/e5a8c1df-04e5-4343-85ef-5df2a7e3d90c@gmail.com
---
 src/backend/executor/execPartition.c          | 36 ++++++++++----
 .../t/010_index_concurrently_upsert.pl        | 47 +++++++++++++++++++
 2 files changed, 75 insertions(+), 8 deletions(-)

diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index 55bdf5c4835..d4625d90ad6 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -28,6 +28,7 @@
 #include "partitioning/partprune.h"
 #include "rewrite/rewriteManip.h"
 #include "utils/acl.h"
+#include "utils/injection_point.h"
 #include "utils/lsyscache.h"
 #include "utils/partcache.h"
 #include "utils/rls.h"
@@ -761,7 +762,8 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
 		if (rootResultRelInfo->ri_onConflictArbiterIndexes != NIL)
 		{
 			List	   *unparented_idxs = NIL,
-					   *arbiters_listidxs = NIL;
+					   *arbiters_listidxs = NIL,
+					   *ancestors_seen = NIL;
 
 			for (int listidx = 0; listidx < leaf_part_rri->ri_NumIndices; listidx++)
 			{
@@ -775,13 +777,29 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
 				 * in case REINDEX CONCURRENTLY is working on one of the
 				 * arbiters.
 				 *
-				 * XXX get_partition_ancestors is slow: it scans pg_inherits
-				 * each time.  Consider a syscache or some other way to cache?
+				 * However, if two indexes appear to have the same parent,
+				 * treat the second of these as if it had no parent.  This
+				 * sounds counterintuitive, but it can happen if a transaction
+				 * running REINDEX CONCURRENTLY commits right between those
+				 * two indexes are checked by another process in this loop.
+				 * This will have the effect of also treating that second
+				 * index as arbiter.
+				 *
+				 * XXX get_partition_ancestors scans pg_inherits, which is not
+				 * only slow, but also means the catalog snapshot can get
+				 * invalidated each time through the loop (cf.
+				 * GetNonHistoricCatalogSnapshot).  Consider a syscache or
+				 * some other way to cache?
 				 */
 				indexoid = RelationGetRelid(leaf_part_rri->ri_IndexRelationDescs[listidx]);
 				ancestors = get_partition_ancestors(indexoid);
-				if (ancestors != NIL)
+				INJECTION_POINT("exec-init-partition-after-get-partition-ancestors", NULL);
+
+				if (ancestors != NIL &&
+					!list_member_oid(ancestors_seen, linitial_oid(ancestors)))
 				{
+					ancestors_seen = lappend_oid(ancestors_seen, linitial_oid(ancestors));
+
 					foreach_oid(parent_idx, rootResultRelInfo->ri_onConflictArbiterIndexes)
 					{
 						if (list_member_oid(ancestors, parent_idx))
@@ -794,6 +812,7 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
 				}
 				else
 					unparented_idxs = lappend_int(unparented_idxs, listidx);
+
 				list_free(ancestors);
 			}
 
@@ -812,16 +831,16 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
 				foreach_int(unparented_i, unparented_idxs)
 				{
 					Relation	unparented_rel;
-					IndexInfo  *unparenred_ii;
+					IndexInfo  *unparented_ii;
 
 					unparented_rel = leaf_part_rri->ri_IndexRelationDescs[unparented_i];
-					unparenred_ii = leaf_part_rri->ri_IndexRelationInfo[unparented_i];
+					unparented_ii = leaf_part_rri->ri_IndexRelationInfo[unparented_i];
 
 					Assert(!list_member_oid(arbiterIndexes,
 											unparented_rel->rd_index->indexrelid));
 
 					/* Ignore indexes not ready */
-					if (!unparenred_ii->ii_ReadyForInserts)
+					if (!unparented_ii->ii_ReadyForInserts)
 						continue;
 
 					foreach_int(arbiter_i, arbiters_listidxs)
@@ -839,7 +858,7 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
 						if (IsIndexCompatibleAsArbiter(arbiter_rel,
 													   arbiter_ii,
 													   unparented_rel,
-													   unparenred_ii))
+													   unparented_ii))
 						{
 							arbiterIndexes = lappend_oid(arbiterIndexes,
 														 unparented_rel->rd_index->indexrelid);
@@ -851,6 +870,7 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
 			}
 			list_free(unparented_idxs);
 			list_free(arbiters_listidxs);
+			list_free(ancestors_seen);
 		}
 
 		/*
diff --git a/src/test/modules/test_misc/t/010_index_concurrently_upsert.pl b/src/test/modules/test_misc/t/010_index_concurrently_upsert.pl
index 26682ebc55a..69178b129c7 100644
--- a/src/test/modules/test_misc/t/010_index_concurrently_upsert.pl
+++ b/src/test/modules/test_misc/t/010_index_concurrently_upsert.pl
@@ -785,6 +785,53 @@ clean_safe_quit_ok($s1, $s2, $s3);
 
 $node->safe_psql('postgres', 'TRUNCATE TABLE test.tblexpr');
 
+############################################################################
+note('Test: REINDEX on partitioned table (invalid arbiter list)');
+
+# causes invalid ERROR:  invalid arbiter index list
+
+$s1 = $node->background_psql('postgres', on_error_stop => 0);
+$s2 = $node->background_psql('postgres', on_error_stop => 0);
+$s3 = $node->background_psql('postgres', on_error_stop => 0);
+
+$s1->query_safe(
+	q[
+SELECT injection_points_set_local();
+SELECT injection_points_attach('exec-init-partition-after-get-partition-ancestors', 'wait');
+]);
+
+$s2->query_safe(
+	q[
+SELECT injection_points_set_local();
+SELECT injection_points_attach('reindex-relation-concurrently-before-swap', 'wait');
+]);
+
+$s2->query_until(
+	qr/starting_reindex/, q[
+\echo starting_reindex
+REINDEX INDEX CONCURRENTLY test.tbl_partition_pkey;
+]);
+
+ok_injection_point($node, 'reindex-relation-concurrently-before-swap');
+
+$s1->query_until(
+	qr/starting_upsert_s1/, q[
+\echo starting_upsert_s1
+INSERT INTO test.tblparted VALUES (13, now()) ON CONFLICT (i) DO UPDATE SET updated_at = now();
+]);
+
+ok_injection_point($node,
+	'exec-init-partition-after-get-partition-ancestors');
+
+wakeup_injection_point($node, 'reindex-relation-concurrently-before-swap');
+
+wakeup_injection_point($node,
+	'exec-init-partition-after-get-partition-ancestors');
+
+clean_safe_quit_ok($s1, $s2, $s3);
+
+$node->safe_psql('postgres', 'TRUNCATE TABLE test.tblparted');
+
 done_testing();
 
 ############################################################################
-- 
2.47.3

