From b5197137540c7301bfe11e61cc02cb6e17d84b8a Mon Sep 17 00:00:00 2001 From: Mikhail Nikalayeu Date: Tue, 31 Dec 2024 14:09:52 +0100 Subject: [PATCH v26 1/8] This is https://commitfest.postgresql.org/50/5160/ and https://commitfest.postgresql.org/patch/5438/ merged in single commit. it is required for stability of stress tests. --- contrib/amcheck/verify_nbtree.c | 68 ++++++------- src/backend/commands/indexcmds.c | 4 +- src/backend/executor/execIndexing.c | 3 + src/backend/executor/execPartition.c | 119 +++++++++++++++++++--- src/backend/executor/nodeModifyTable.c | 2 + src/backend/optimizer/util/plancat.c | 135 ++++++++++++++++++------- src/backend/utils/time/snapmgr.c | 2 + 7 files changed, 245 insertions(+), 88 deletions(-) diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index 0949c88983a..2445f001700 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -382,7 +382,6 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, BTMetaPageData *metad; uint32 previouslevel; BtreeLevel current; - Snapshot snapshot = SnapshotAny; if (!readonly) elog(DEBUG1, "verifying consistency of tree structure for index \"%s\"", @@ -433,38 +432,35 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, state->heaptuplespresent = 0; /* - * Register our own snapshot in !readonly case, rather than asking + * Register our own snapshot for heapallindexed, rather than asking * table_index_build_scan() to do this for us later. This needs to * happen before index fingerprinting begins, so we can later be * certain that index fingerprinting should have reached all tuples * returned by table_index_build_scan(). */ - if (!state->readonly) - { - snapshot = RegisterSnapshot(GetTransactionSnapshot()); + state->snapshot = RegisterSnapshot(GetTransactionSnapshot()); - /* - * GetTransactionSnapshot() always acquires a new MVCC snapshot in - * READ COMMITTED mode. A new snapshot is guaranteed to have all - * the entries it requires in the index. - * - * We must defend against the possibility that an old xact - * snapshot was returned at higher isolation levels when that - * snapshot is not safe for index scans of the target index. This - * is possible when the snapshot sees tuples that are before the - * index's indcheckxmin horizon. Throwing an error here should be - * very rare. It doesn't seem worth using a secondary snapshot to - * avoid this. - */ - if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin && - !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data), - snapshot->xmin)) - ereport(ERROR, - (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), - errmsg("index \"%s\" cannot be verified using transaction snapshot", - RelationGetRelationName(rel)))); - } - } + /* + * GetTransactionSnapshot() always acquires a new MVCC snapshot in + * READ COMMITTED mode. A new snapshot is guaranteed to have all + * the entries it requires in the index. + * + * We must defend against the possibility that an old xact + * snapshot was returned at higher isolation levels when that + * snapshot is not safe for index scans of the target index. This + * is possible when the snapshot sees tuples that are before the + * index's indcheckxmin horizon. Throwing an error here should be + * very rare. It doesn't seem worth using a secondary snapshot to + * avoid this. + */ + if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin && + !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data), + state->snapshot->xmin)) + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("index \"%s\" cannot be verified using transaction snapshot", + RelationGetRelationName(rel)))); +} /* * We need a snapshot to check the uniqueness of the index. For better @@ -476,9 +472,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, state->indexinfo = BuildIndexInfo(state->rel); if (state->indexinfo->ii_Unique) { - if (snapshot != SnapshotAny) - state->snapshot = snapshot; - else + if (state->snapshot == InvalidSnapshot) state->snapshot = RegisterSnapshot(GetTransactionSnapshot()); } } @@ -555,13 +549,12 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, /* * Create our own scan for table_index_build_scan(), rather than * getting it to do so for us. This is required so that we can - * actually use the MVCC snapshot registered earlier in !readonly - * case. + * actually use the MVCC snapshot registered earlier. * * Note that table_index_build_scan() calls heap_endscan() for us. */ scan = table_beginscan_strat(state->heaprel, /* relation */ - snapshot, /* snapshot */ + state->snapshot, /* snapshot */ 0, /* number of keys */ NULL, /* scan key */ true, /* buffer access strategy OK */ @@ -569,7 +562,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, /* * Scan will behave as the first scan of a CREATE INDEX CONCURRENTLY - * behaves in !readonly case. + * behaves. * * It's okay that we don't actually use the same lock strength for the * heap relation as any other ii_Concurrent caller would in !readonly @@ -578,7 +571,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, * that needs to be sure that there was no concurrent recycling of * TIDs. */ - indexinfo->ii_Concurrent = !state->readonly; + indexinfo->ii_Concurrent = true; /* * Don't wait for uncommitted tuple xact commit/abort when index is a @@ -602,14 +595,11 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, state->heaptuplespresent, RelationGetRelationName(heaprel), 100.0 * bloom_prop_bits_set(state->filter)))); - if (snapshot != SnapshotAny) - UnregisterSnapshot(snapshot); - bloom_free(state->filter); } /* Be tidy: */ - if (snapshot == SnapshotAny && state->snapshot != InvalidSnapshot) + if (state->snapshot != InvalidSnapshot) UnregisterSnapshot(state->snapshot); MemoryContextDelete(state->targetcontext); } diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 5712fac3697..974243c5c60 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -1789,6 +1789,7 @@ DefineIndex(Oid tableId, * before the reference snap was taken, we have to wait out any * transactions that might have older snapshots. */ + INJECTION_POINT("define_index_before_set_valid", NULL); pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE, PROGRESS_CREATEIDX_PHASE_WAIT_3); WaitForOlderSnapshots(limitXmin, true); @@ -4228,7 +4229,7 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein * the same time to make sure we only get constraint violations from the * indexes with the correct names. */ - + INJECTION_POINT("reindex_relation_concurrently_before_swap", NULL); StartTransactionCommand(); /* @@ -4307,6 +4308,7 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein * index_drop() for more details. */ + INJECTION_POINT("reindex_relation_concurrently_before_set_dead", NULL); pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE, PROGRESS_CREATEIDX_PHASE_WAIT_4); WaitForLockersMultiple(lockTags, AccessExclusiveLock, true); diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index 401606f840a..df7e7bce86d 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -117,6 +117,7 @@ #include "utils/multirangetypes.h" #include "utils/rangetypes.h" #include "utils/snapmgr.h" +#include "utils/injection_point.h" /* waitMode argument to check_exclusion_or_unique_constraint() */ typedef enum @@ -942,6 +943,8 @@ retry: econtext->ecxt_scantuple = save_scantuple; ExecDropSingleTupleTableSlot(existing_slot); + if (!conflict) + INJECTION_POINT("check_exclusion_or_unique_constraint_no_conflict", NULL); return !conflict; } diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index aa12e9ad2ea..066686483f0 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -490,6 +490,48 @@ ExecFindPartition(ModifyTableState *mtstate, return rri; } +/* + * IsIndexCompatibleAsArbiter + * Checks if the indexes are identical in terms of being used + * as arbiters for the INSERT ON CONFLICT operation by comparing + * them to the provided arbiter index. + * + * Returns the true if indexes are compatible. + */ +static bool +IsIndexCompatibleAsArbiter(Relation arbiterIndexRelation, + IndexInfo *arbiterIndexInfo, + Relation indexRelation, + IndexInfo *indexInfo) +{ + int i; + + if (arbiterIndexInfo->ii_Unique != indexInfo->ii_Unique) + return false; + /* it is not supported for cases of exclusion constraints. */ + if (arbiterIndexInfo->ii_ExclusionOps != NULL || indexInfo->ii_ExclusionOps != NULL) + return false; + if (arbiterIndexRelation->rd_index->indnkeyatts != indexRelation->rd_index->indnkeyatts) + return false; + + for (i = 0; i < indexRelation->rd_index->indnkeyatts; i++) + { + int arbiterAttoNo = arbiterIndexRelation->rd_index->indkey.values[i]; + int attoNo = indexRelation->rd_index->indkey.values[i]; + if (arbiterAttoNo != attoNo) + return false; + } + + if (list_difference(RelationGetIndexExpressions(arbiterIndexRelation), + RelationGetIndexExpressions(indexRelation)) != NIL) + return false; + + if (list_difference(RelationGetIndexPredicate(arbiterIndexRelation), + RelationGetIndexPredicate(indexRelation)) != NIL) + return false; + return true; +} + /* * ExecInitPartitionInfo * Lock the partition and initialize ResultRelInfo. Also setup other @@ -701,6 +743,8 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate, if (rootResultRelInfo->ri_onConflictArbiterIndexes != NIL) { List *childIdxs; + List *nonAncestorIdxs = NIL; + int i, j, additional_arbiters = 0; childIdxs = RelationGetIndexList(leaf_part_rri->ri_RelationDesc); @@ -711,23 +755,74 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate, ListCell *lc2; ancestors = get_partition_ancestors(childIdx); - foreach(lc2, rootResultRelInfo->ri_onConflictArbiterIndexes) + if (ancestors) { - if (list_member_oid(ancestors, lfirst_oid(lc2))) - arbiterIndexes = lappend_oid(arbiterIndexes, childIdx); + foreach(lc2, rootResultRelInfo->ri_onConflictArbiterIndexes) + { + if (list_member_oid(ancestors, lfirst_oid(lc2))) + arbiterIndexes = lappend_oid(arbiterIndexes, childIdx); + } } + else /* No ancestor was found for that index. Save it for rechecking later. */ + nonAncestorIdxs = lappend_oid(nonAncestorIdxs, childIdx); list_free(ancestors); } + + /* + * If any non-ancestor indexes are found, we need to compare them with other + * indexes of the relation that will be used as arbiters. This is necessary + * when a partitioned index is processed by REINDEX CONCURRENTLY. Both indexes + * must be considered as arbiters to ensure that all concurrent transactions + * use the same set of arbiters. + */ + if (nonAncestorIdxs) + { + for (i = 0; i < leaf_part_rri->ri_NumIndices; i++) + { + if (list_member_oid(nonAncestorIdxs, leaf_part_rri->ri_IndexRelationDescs[i]->rd_index->indexrelid)) + { + Relation nonAncestorIndexRelation = leaf_part_rri->ri_IndexRelationDescs[i]; + IndexInfo *nonAncestorIndexInfo = leaf_part_rri->ri_IndexRelationInfo[i]; + Assert(!list_member_oid(arbiterIndexes, nonAncestorIndexRelation->rd_index->indexrelid)); + + /* It is too early to us non-ready indexes as arbiters */ + if (!nonAncestorIndexInfo->ii_ReadyForInserts) + continue; + + for (j = 0; j < leaf_part_rri->ri_NumIndices; j++) + { + if (list_member_oid(arbiterIndexes, + leaf_part_rri->ri_IndexRelationDescs[j]->rd_index->indexrelid)) + { + Relation arbiterIndexRelation = leaf_part_rri->ri_IndexRelationDescs[j]; + IndexInfo *arbiterIndexInfo = leaf_part_rri->ri_IndexRelationInfo[j]; + + /* If non-ancestor index are compatible to arbiter - use it as arbiter too. */ + if (IsIndexCompatibleAsArbiter(arbiterIndexRelation, arbiterIndexInfo, + nonAncestorIndexRelation, nonAncestorIndexInfo)) + { + arbiterIndexes = lappend_oid(arbiterIndexes, + nonAncestorIndexRelation->rd_index->indexrelid); + additional_arbiters++; + } + } + } + } + } + } + list_free(nonAncestorIdxs); + + /* + * If the resulting lists are of inequal length, something is wrong. + * (This shouldn't happen, since arbiter index selection should not + * pick up a non-ready index.) + * + * But we need to consider an additional arbiter indexes also. + */ + if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) != + list_length(arbiterIndexes) - additional_arbiters) + elog(ERROR, "invalid arbiter index list"); } - - /* - * If the resulting lists are of inequal length, something is wrong. - * (This shouldn't happen, since arbiter index selection should not - * pick up an invalid index.) - */ - if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) != - list_length(arbiterIndexes)) - elog(ERROR, "invalid arbiter index list"); leaf_part_rri->ri_onConflictArbiterIndexes = arbiterIndexes; /* diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 4c5647ac38a..f6d2a6ede93 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -70,6 +70,7 @@ #include "utils/datum.h" #include "utils/rel.h" #include "utils/snapmgr.h" +#include "utils/injection_point.h" typedef struct MTTargetRelLookup @@ -1179,6 +1180,7 @@ ExecInsert(ModifyTableContext *context, return NULL; } } + INJECTION_POINT("exec_insert_before_insert_speculative", NULL); /* * Before we start insertion proper, acquire our "speculative diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index d950bd93002..ff416f0522c 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -808,12 +808,14 @@ infer_arbiter_indexes(PlannerInfo *root) List *indexList; ListCell *l; - /* Normalized inference attributes and inference expressions: */ - Bitmapset *inferAttrs = NULL; - List *inferElems = NIL; + /* Normalized required attributes and expressions: */ + Bitmapset *requiredArbiterAttrs = NULL; + List *requiredArbiterElems = NIL; + List *requiredIndexPredExprs = (List *) onconflict->arbiterWhere; /* Results */ List *results = NIL; + bool foundValid = false; /* * Quickly return NIL for ON CONFLICT DO NOTHING without an inference @@ -848,8 +850,8 @@ infer_arbiter_indexes(PlannerInfo *root) if (!IsA(elem->expr, Var)) { - /* If not a plain Var, just shove it in inferElems for now */ - inferElems = lappend(inferElems, elem->expr); + /* If not a plain Var, just shove it in requiredArbiterElems for now */ + requiredArbiterElems = lappend(requiredArbiterElems, elem->expr); continue; } @@ -861,30 +863,76 @@ infer_arbiter_indexes(PlannerInfo *root) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("whole row unique index inference specifications are not supported"))); - inferAttrs = bms_add_member(inferAttrs, + requiredArbiterAttrs = bms_add_member(requiredArbiterAttrs, attno - FirstLowInvalidHeapAttributeNumber); } + indexList = RelationGetIndexList(relation); + /* * Lookup named constraint's index. This is not immediately returned - * because some additional sanity checks are required. + * because some additional sanity checks are required. Additionally, we + * need to process other indexes as potential arbiters to account for + * cases where REINDEX CONCURRENTLY is processing an index used as a + * named constraint. */ if (onconflict->constraint != InvalidOid) { indexOidFromConstraint = get_constraint_index(onconflict->constraint); if (indexOidFromConstraint == InvalidOid) + { ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("constraint in ON CONFLICT clause has no associated index"))); + errmsg("constraint in ON CONFLICT clause has no associated index"))); + } + + /* + * Find the named constraint index to extract its attributes and predicates. + * We open all indexes in the loop to avoid deadlock of changed order of locks. + * */ + foreach(l, indexList) + { + Oid indexoid = lfirst_oid(l); + Relation idxRel; + Form_pg_index idxForm; + AttrNumber natt; + + idxRel = index_open(indexoid, rte->rellockmode); + idxForm = idxRel->rd_index; + + if (idxForm->indisready) + { + if (indexOidFromConstraint == idxForm->indexrelid) + { + /* + * Prepare requirements for other indexes to be used as arbiter together + * with indexOidFromConstraint. It is required to involve both equals indexes + * in case of REINDEX CONCURRENTLY. + */ + for (natt = 0; natt < idxForm->indnkeyatts; natt++) + { + int attno = idxRel->rd_index->indkey.values[natt]; + + if (attno != 0) + requiredArbiterAttrs = bms_add_member(requiredArbiterAttrs, + attno - FirstLowInvalidHeapAttributeNumber); + } + requiredArbiterElems = RelationGetIndexExpressions(idxRel); + requiredIndexPredExprs = RelationGetIndexPredicate(idxRel); + /* We are done, so, quite the loop. */ + index_close(idxRel, NoLock); + break; + } + } + index_close(idxRel, NoLock); + } } /* * Using that representation, iterate through the list of indexes on the * target relation to try and find a match */ - indexList = RelationGetIndexList(relation); - foreach(l, indexList) { Oid indexoid = lfirst_oid(l); @@ -907,7 +955,13 @@ infer_arbiter_indexes(PlannerInfo *root) idxRel = index_open(indexoid, rte->rellockmode); idxForm = idxRel->rd_index; - if (!idxForm->indisvalid) + /* + * We need to consider both indisvalid and indisready indexes because + * them may become indisvalid before execution phase. It is required + * to keep set of indexes used as arbiter to be the same for all + * concurrent transactions. + */ + if (!idxForm->indisready) goto next; /* @@ -927,27 +981,23 @@ infer_arbiter_indexes(PlannerInfo *root) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("ON CONFLICT DO UPDATE not supported with exclusion constraints"))); - - results = lappend_oid(results, idxForm->indexrelid); - list_free(indexList); - index_close(idxRel, NoLock); - table_close(relation, NoLock); - return results; + goto found; } else if (indexOidFromConstraint != InvalidOid) { - /* No point in further work for index in named constraint case */ - goto next; + /* In the case of "ON constraint_name DO UPDATE" we need to skip non-unique candidates. */ + if (!idxForm->indisunique && onconflict->action == ONCONFLICT_UPDATE) + goto next; + } else { + /* + * Only considering conventional inference at this point (not named + * constraints), so index under consideration can be immediately + * skipped if it's not unique + */ + if (!idxForm->indisunique) + goto next; } - /* - * Only considering conventional inference at this point (not named - * constraints), so index under consideration can be immediately - * skipped if it's not unique - */ - if (!idxForm->indisunique) - goto next; - /* * So-called unique constraints with WITHOUT OVERLAPS are really * exclusion constraints, so skip those too. @@ -967,7 +1017,7 @@ infer_arbiter_indexes(PlannerInfo *root) } /* Non-expression attributes (if any) must match */ - if (!bms_equal(indexedAttrs, inferAttrs)) + if (!bms_equal(indexedAttrs, requiredArbiterAttrs)) goto next; /* Expression attributes (if any) must match */ @@ -975,6 +1025,10 @@ infer_arbiter_indexes(PlannerInfo *root) if (idxExprs && varno != 1) ChangeVarNodes((Node *) idxExprs, 1, varno, 0); + /* + * If arbiterElems are present, check them. If name >constraint is + * present arbiterElems == NIL. + */ foreach(el, onconflict->arbiterElems) { InferenceElem *elem = (InferenceElem *) lfirst(el); @@ -1012,27 +1066,35 @@ infer_arbiter_indexes(PlannerInfo *root) } /* - * Now that all inference elements were matched, ensure that the + * In case of the conventional inference involved ensure that the * expression elements from inference clause are not missing any * cataloged expressions. This does the right thing when unique * indexes redundantly repeat the same attribute, or if attributes * redundantly appear multiple times within an inference clause. + * + * In the case of named constraint ensure candidate has equal set + * of expressions as the named constraint index. */ - if (list_difference(idxExprs, inferElems) != NIL) + if (list_difference(idxExprs, requiredArbiterElems) != NIL) goto next; - /* - * If it's a partial index, its predicate must be implied by the ON - * CONFLICT's WHERE clause. - */ predExprs = RelationGetIndexPredicate(idxRel); if (predExprs && varno != 1) ChangeVarNodes((Node *) predExprs, 1, varno, 0); - if (!predicate_implied_by(predExprs, (List *) onconflict->arbiterWhere, false)) + /* + * If it's a partial index and conventional inference, its predicate must be implied + * by the ON CONFLICT's WHERE clause. + */ + if (indexOidFromConstraint == InvalidOid && !predicate_implied_by(predExprs, requiredIndexPredExprs, false)) + goto next; + /* If it's a partial index and named constraint predicates must be equal. */ + if (indexOidFromConstraint != InvalidOid && list_difference(predExprs, requiredIndexPredExprs) != NIL) goto next; +found: results = lappend_oid(results, idxForm->indexrelid); + foundValid |= idxForm->indisvalid; next: index_close(idxRel, NoLock); } @@ -1040,7 +1102,8 @@ next: list_free(indexList); table_close(relation, NoLock); - if (results == NIL) + /* It is required to have at least one indisvalid index during the planning. */ + if (results == NIL || !foundValid) ereport(ERROR, (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), errmsg("there is no unique or exclusion constraint matching the ON CONFLICT specification"))); diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index 65561cc6bc3..8e1a918f130 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -123,6 +123,7 @@ #include "utils/resowner.h" #include "utils/snapmgr.h" #include "utils/syscache.h" +#include "utils/injection_point.h" /* @@ -458,6 +459,7 @@ InvalidateCatalogSnapshot(void) pairingheap_remove(&RegisteredSnapshots, &CatalogSnapshot->ph_node); CatalogSnapshot = NULL; SnapshotResetXmin(); + INJECTION_POINT("invalidate_catalog_snapshot_end", NULL); } } -- 2.43.0