From e9600073108c9fbfe64087932f4bb2ea12f58418 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Tue, 29 Jan 2019 21:45:29 +0100 Subject: [PATCH v7] REINDEX CONCURRENTLY --- doc/src/sgml/mvcc.sgml | 1 + doc/src/sgml/ref/reindex.sgml | 184 +++- src/backend/catalog/index.c | 547 ++++++++++- src/backend/catalog/pg_depend.c | 143 +++ src/backend/catalog/toasting.c | 2 +- src/backend/commands/indexcmds.c | 882 +++++++++++++++--- src/backend/commands/tablecmds.c | 32 +- src/backend/nodes/copyfuncs.c | 1 + src/backend/nodes/equalfuncs.c | 1 + src/backend/parser/gram.y | 22 +- src/backend/tcop/utility.c | 10 +- src/bin/psql/common.c | 16 + src/bin/psql/tab-complete.c | 18 +- src/include/catalog/dependency.h | 5 + src/include/catalog/index.h | 17 + src/include/commands/defrem.h | 6 +- src/include/nodes/parsenodes.h | 1 + .../expected/reindex-concurrently.out | 78 ++ src/test/isolation/isolation_schedule | 1 + .../isolation/specs/reindex-concurrently.spec | 40 + src/test/regress/expected/create_index.out | 95 ++ src/test/regress/sql/create_index.sql | 61 ++ 22 files changed, 1989 insertions(+), 174 deletions(-) create mode 100644 src/test/isolation/expected/reindex-concurrently.out create mode 100644 src/test/isolation/specs/reindex-concurrently.spec diff --git a/doc/src/sgml/mvcc.sgml b/doc/src/sgml/mvcc.sgml index bedd9a008d..9b7ef8bf09 100644 --- a/doc/src/sgml/mvcc.sgml +++ b/doc/src/sgml/mvcc.sgml @@ -926,6 +926,7 @@ Table-level Lock Modes Acquired by VACUUM (without ), ANALYZE, CREATE INDEX CONCURRENTLY, + REINDEX CONCURRENTLY, CREATE STATISTICS, and certain ALTER INDEX and ALTER TABLE variants (for full details see and -REINDEX [ ( VERBOSE ) ] { INDEX | TABLE | SCHEMA | DATABASE | SYSTEM } name +REINDEX [ ( VERBOSE ) ] { INDEX | TABLE | SCHEMA | DATABASE | SYSTEM } [ CONCURRENTLY ] name @@ -67,10 +67,7 @@ Description An index build with the CONCURRENTLY option failed, leaving an invalid index. Such indexes are useless but it can be - convenient to use REINDEX to rebuild them. Note that - REINDEX will not perform a concurrent build. To build the - index without interfering with production you should drop the index and - reissue the CREATE INDEX CONCURRENTLY command. + convenient to use REINDEX to rebuild them. @@ -151,6 +148,21 @@ Parameters + + CONCURRENTLY + + + When this option is used, PostgreSQL will rebuild the + index without taking any locks that prevent concurrent inserts, + updates, or deletes on the table; whereas a standard reindex build + locks out writes (but not reads) on the table until it's done. + There are several caveats to be aware of when using this option + — see . + + + + VERBOSE @@ -241,6 +253,160 @@ Notes Each individual partition can be reindexed separately instead. + + Rebuilding Indexes Concurrently + + + index + rebuilding concurrently + + + + Rebuilding an index can interfere with regular operation of a database. + Normally PostgreSQL locks the table whose index is rebuilt + against writes and performs the entire index build with a single scan of the + table. Other transactions can still read the table, but if they try to + insert, update, or delete rows in the table they will block until the + index rebuild is finished. This could have a severe effect if the system is + a live production database. Very large tables can take many hours to be + indexed, and even for smaller tables, an index rebuild can lock out writers + for periods that are unacceptably long for a production system. + + + + PostgreSQL supports rebuilding indexes with minimum locking + of writes. This method is invoked by specifying the + CONCURRENTLY option of REINDEX. When this option + is used, PostgreSQL must perform two scans of the table + for each index that needs to be rebuild and in addition it must wait for + all existing transactions that could potentially use the index to + terminate. This method requires more total work than a standard index + rebuild and takes significantly longer to complete as it needs to wait + for unfinished transactions that might modify the index. However, since + it allows normal operations to continue while the index is rebuilt, this + method is useful for rebuilding indexes in a production environment. Of + course, the extra CPU, memory and I/O load imposed by the index rebuild + may slow down other operations. + + + + The following steps occur in a concurrent index build, each in a separate + transaction except when the new index definitions are created, where all + the concurrent entries are created using only one transaction. Note that + if there are multiple indexes to be rebuilt then each step loops through + all the indexes we're rebuilding, using a separate transaction for each one. + REINDEX CONCURRENTLY proceeds as follows when rebuilding + indexes: + + + + + A new temporary index definition is added into the catalog + pg_index. This definition will be used to replace the + old index. This step is done as a single transaction for all the indexes + involved in this process, meaning that if + REINDEX CONCURRENTLY is run on a table with multiple + indexes, all the catalog entries of the new indexes are created within a + single transaction. A SHARE UPDATE EXCLUSIVE lock at + session level is taken on the indexes being reindexed as well as its + parent table to prevent any schema modification while processing. + + + + + A first pass to build the index is done for each new index entry. + Once the index is built, its flag pg_index.indisready is + switched to true to make ready for inserts, making it + visible to other sessions once the transaction that performed the + build is finished. This step is done within a single transaction + for each entry. + + + + + Then a second pass is performed to add tuples that were added while + the first pass build was running. This step is performed within a single + transaction for each temporary entry. + + + + + All the constraints and foreign keys which refer to the index are swapped + to refer to the new index definition, and the names of the indexes are + changed. At this point pg_index.indisvalid is switched to + true for the new index and to false for the old, and + a cache invalidation is done so as all the sessions that referenced the + old index are invalidated. This step is done within a single transaction + for each temporary entry. + + + + + Old indexes have pg_index.indisready switched to false + to prevent any new tuple insertions after waiting for running queries which + may reference the old index to complete. This step is done within a single + transaction for each temporary entry. + + + + + The old index definition and its data are dropped. This step is done within + a single transaction for each temporary entry. + + + + + The SHARE UPDATE EXCLUSIVE session lock is released + for all the indexes processed as well as their parent tables. + + + + + + + If a problem arises while rebuilding the indexes, such as a + uniqueness violation in a unique index, the REINDEX + command will fail but leave behind an invalid new index on top + of the existing one. This index will be ignored for querying purposes + because it might be incomplete; however it will still consume update + overhead. The psql \d command will report + such an index as INVALID: + + +postgres=# \d tab + Table "public.tab" + Column | Type | Modifiers +--------+---------+----------- + col | integer | +Indexes: + "idx" btree (col) + "idx_cct" btree (col) INVALID + + + The recommended recovery method in such cases is to drop the invalid + index and try again to perform REINDEX CONCURRENTLY. + The concurrent index created during the processing has a name ending in + the suffix ccnew, or ccold if it is an old index definition which we failed + to drop. Invalid indexes can be dropped using DROP INDEX, + including invalid toast indexes. + + + + Regular index builds permit other regular index builds on the same + table to occur in parallel, but only one concurrent index build can + occur on a table at a time. In both cases, no other types of schema + modification on the table are allowed meanwhile. Another difference + is that a regular REINDEX TABLE or REINDEX INDEX + command can be performed within a transaction block, but + REINDEX CONCURRENTLY cannot. + + + + REINDEX SYSTEM does not support + CONCURRENTLY since system catalogs cannot be reindexed + concurrently. + + @@ -272,6 +438,14 @@ Examples ... broken_db=> REINDEX DATABASE broken_db; broken_db=> \q + + + + Rebuild a table while authorizing read and write operations on involved + relations when performed: + + +REINDEX TABLE CONCURRENTLY my_broken_table; diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 225c078018..be857237e7 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -41,6 +41,7 @@ #include "catalog/pg_am.h" #include "catalog/pg_collation.h" #include "catalog/pg_constraint.h" +#include "catalog/pg_description.h" #include "catalog/pg_depend.h" #include "catalog/pg_inherits.h" #include "catalog/pg_operator.h" @@ -693,6 +694,7 @@ UpdateIndexRelation(Oid indexoid, * classObjectId: array of index opclass OIDs, one per index column * coloptions: array of per-index-column indoption settings * reloptions: AM-specific options + * tupdesc: Tuple descriptor used for the index if defined * flags: bitmask that can include any combination of these bits: * INDEX_CREATE_IS_PRIMARY * the index is a primary key @@ -733,6 +735,7 @@ index_create(Relation heapRelation, Oid *classObjectId, int16 *coloptions, Datum reloptions, + TupleDesc tupdesc, bits16 flags, bits16 constr_flags, bool allow_system_table_mods, @@ -795,7 +798,7 @@ index_create(Relation heapRelation, * release locks before committing in catalogs */ if (concurrent && - IsSystemRelation(heapRelation)) + IsSystemNamespace(get_rel_namespace(heapRelationId))) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("concurrent index creation on system catalog tables is not supported"))); @@ -863,14 +866,20 @@ index_create(Relation heapRelation, } /* - * construct tuple descriptor for index tuples + * construct tuple descriptor for index tuples if not passed by caller */ - indexTupDesc = ConstructTupleDescriptor(heapRelation, - indexInfo, - indexColNames, - accessMethodObjectId, - collationObjectId, - classObjectId); + if (!tupdesc) + indexTupDesc = ConstructTupleDescriptor(heapRelation, + indexInfo, + indexColNames, + accessMethodObjectId, + collationObjectId, + classObjectId); + else + { + Assert(indexColNames == NIL); + indexTupDesc = tupdesc; + } /* * Allocate an OID for the index, unless we were told what to use. @@ -1201,6 +1210,496 @@ index_create(Relation heapRelation, return indexRelationId; } +/* + * index_concurrently_create_copy + * + * Create concurrently an index based on the definition of the one provided by + * caller. The index is inserted into catalogs and needs to be built later + * on. This is called during concurrent reindex processing. + */ +Oid +index_concurrently_create_copy(Relation heapRelation, Oid oldIndexId, const char *newName) +{ + Relation indexRelation; + IndexInfo *indexInfo; + Oid newIndexId = InvalidOid; + HeapTuple indexTuple, + classTuple; + Datum indclassDatum, + colOptionDatum, + optionDatum; + TupleDesc indexTupDesc; + oidvector *indclass; + int2vector *indcoloptions; + bool isnull; + + indexRelation = index_open(oldIndexId, RowExclusiveLock); + + /* New index uses the same index information as old index */ + indexInfo = BuildIndexInfo(indexRelation); + + /* Do not copy exclusion constraint */ + indexInfo->ii_ExclusionOps = NULL; + indexInfo->ii_ExclusionProcs = NULL; + indexInfo->ii_ExclusionStrats = NULL; + + /* Create a copy of the tuple descriptor to be used for the new entry */ + indexTupDesc = CreateTupleDescCopyConstr(RelationGetDescr(indexRelation)); + + /* Get the array of class and column options IDs from index info */ + indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(oldIndexId)); + if (!HeapTupleIsValid(indexTuple)) + elog(ERROR, "cache lookup failed for index %u", oldIndexId); + indclassDatum = SysCacheGetAttr(INDEXRELID, indexTuple, + Anum_pg_index_indclass, &isnull); + Assert(!isnull); + indclass = (oidvector *) DatumGetPointer(indclassDatum); + + colOptionDatum = SysCacheGetAttr(INDEXRELID, indexTuple, + Anum_pg_index_indoption, &isnull); + Assert(!isnull); + indcoloptions = (int2vector *) DatumGetPointer(colOptionDatum); + + /* Fetch options of index if any */ + classTuple = SearchSysCache1(RELOID, oldIndexId); + if (!HeapTupleIsValid(classTuple)) + elog(ERROR, "cache lookup failed for relation %u", oldIndexId); + optionDatum = SysCacheGetAttr(RELOID, classTuple, + Anum_pg_class_reloptions, &isnull); + + /* Now create the new index */ + newIndexId = index_create(heapRelation, + newName, + InvalidOid, /* indexRelationId */ + InvalidOid, /* parentIndexRelid */ + InvalidOid, /* parentConstraintId */ + InvalidOid, /* relFileNode */ + indexInfo, + NIL, + indexRelation->rd_rel->relam, + indexRelation->rd_rel->reltablespace, + indexRelation->rd_indcollation, + indclass->values, + indcoloptions->values, + optionDatum, + indexTupDesc, + INDEX_CREATE_SKIP_BUILD | INDEX_CREATE_CONCURRENT, + 0, + true, /* allow table to be a system catalog? */ + false, /* is_internal? */ + NULL); + + /* Close the relations used and clean up */ + index_close(indexRelation, NoLock); + ReleaseSysCache(indexTuple); + ReleaseSysCache(classTuple); + + return newIndexId; +} + +/* + * index_concurrently_build + * + * Build index for a concurrent operation. Low-level locks are taken when + * this operation is performed to prevent only schema changes, but they need to + * be kept until the end of the transaction performing this operation. + */ +void +index_concurrently_build(Oid heapOid, + Oid indexOid) +{ + Relation heapRel, + indexRelation; + IndexInfo *indexInfo; + + /* Open and lock the parent heap relation */ + heapRel = table_open(heapOid, ShareUpdateExclusiveLock); + + /* And the target index relation */ + indexRelation = index_open(indexOid, RowExclusiveLock); + + /* + * We have to re-build the IndexInfo struct, since it was lost in the + * commit of the transaction where this concurrent index was created at + * the catalog level. + */ + indexInfo = BuildIndexInfo(indexRelation); + Assert(!indexInfo->ii_ReadyForInserts); + indexInfo->ii_Concurrent = true; + indexInfo->ii_BrokenHotChain = false; + + /* Now build the index */ + index_build(heapRel, indexRelation, indexInfo, false, true); + + /* Close both relations, and keep the locks */ + table_close(heapRel, NoLock); + index_close(indexRelation, NoLock); + + /* + * Update the pg_index row to mark the index as ready for inserts. Once + * we commit this transaction, any new transactions that open the table + * must insert new entries into the index for insertions and non-HOT + * updates. + */ + index_set_state_flags(indexOid, INDEX_CREATE_SET_READY); +} + +/* + * index_concurrently_swap + * + * Swap name, dependencies, and constraints of the old index over to the new + * index, while marking the old index as invalid and the new as valid. + */ +void +index_concurrently_swap(Oid newIndexId, Oid oldIndexId, const char *oldName) +{ + Relation pg_class, + pg_index, + pg_constraint, + pg_trigger; + Relation oldClassRel, + newClassRel; + HeapTuple oldClassTuple, + newClassTuple; + Form_pg_class oldClassForm, + newClassForm; + HeapTuple oldIndexTuple, + newIndexTuple; + Form_pg_index oldIndexForm, + newIndexForm; + Oid indexConstraintOid; + List *constraintOids = NIL; + ListCell *lc; + + /* + * Take a necessary lock on the old and new index before swapping them. + */ + oldClassRel = relation_open(oldIndexId, ShareUpdateExclusiveLock); + newClassRel = relation_open(newIndexId, ShareUpdateExclusiveLock); + + /* Now swap names and dependencies of those indexes */ + pg_class = table_open(RelationRelationId, RowExclusiveLock); + + oldClassTuple = SearchSysCacheCopy1(RELOID, + ObjectIdGetDatum(oldIndexId)); + if (!HeapTupleIsValid(oldClassTuple)) + elog(ERROR, "could not find tuple for relation %u", oldIndexId); + newClassTuple = SearchSysCacheCopy1(RELOID, + ObjectIdGetDatum(newIndexId)); + if (!HeapTupleIsValid(newClassTuple)) + elog(ERROR, "could not find tuple for relation %u", newIndexId); + + oldClassForm = (Form_pg_class) GETSTRUCT(oldClassTuple); + newClassForm = (Form_pg_class) GETSTRUCT(newClassTuple); + + /* Swap the names */ + namestrcpy(&newClassForm->relname, NameStr(oldClassForm->relname)); + namestrcpy(&oldClassForm->relname, oldName); + + CatalogTupleUpdate(pg_class, &oldClassTuple->t_self, oldClassTuple); + CatalogTupleUpdate(pg_class, &newClassTuple->t_self, newClassTuple); + + heap_freetuple(oldClassTuple); + heap_freetuple(newClassTuple); + + /* Now swap index info */ + pg_index = table_open(IndexRelationId, RowExclusiveLock); + + oldIndexTuple = SearchSysCacheCopy1(INDEXRELID, + ObjectIdGetDatum(oldIndexId)); + if (!HeapTupleIsValid(oldIndexTuple)) + elog(ERROR, "could not find tuple for relation %u", oldIndexId); + newIndexTuple = SearchSysCacheCopy1(INDEXRELID, + ObjectIdGetDatum(newIndexId)); + if (!HeapTupleIsValid(newIndexTuple)) + elog(ERROR, "could not find tuple for relation %u", newIndexId); + + oldIndexForm = (Form_pg_index) GETSTRUCT(oldIndexTuple); + newIndexForm = (Form_pg_index) GETSTRUCT(newIndexTuple); + + /* + * Copy constraint flags for old index. This is safe because the old index + * guaranteed uniqueness. + */ + newIndexForm->indisprimary = oldIndexForm->indisprimary; + oldIndexForm->indisprimary = false; + newIndexForm->indisexclusion = oldIndexForm->indisexclusion; + oldIndexForm->indisexclusion = false; + newIndexForm->indimmediate = oldIndexForm->indimmediate; + oldIndexForm->indimmediate = true; + + /* Mark old index as valid and new as invalid as index_set_state_flags */ + newIndexForm->indisvalid = true; + oldIndexForm->indisvalid = false; + oldIndexForm->indisclustered = false; + + CatalogTupleUpdate(pg_index, &oldIndexTuple->t_self, oldIndexTuple); + CatalogTupleUpdate(pg_index, &newIndexTuple->t_self, newIndexTuple); + + heap_freetuple(oldIndexTuple); + heap_freetuple(newIndexTuple); + + /* + * Move constraints and triggers over to the new index + */ + + constraintOids = get_index_ref_constraints(oldIndexId); + + indexConstraintOid = get_index_constraint(oldIndexId); + + if (OidIsValid(indexConstraintOid)) + constraintOids = lappend_oid(constraintOids, indexConstraintOid); + + pg_constraint = table_open(ConstraintRelationId, RowExclusiveLock); + pg_trigger = table_open(TriggerRelationId, RowExclusiveLock); + + foreach(lc, constraintOids) + { + HeapTuple constraintTuple, + triggerTuple; + Form_pg_constraint conForm; + ScanKeyData key[1]; + SysScanDesc scan; + Oid constraintOid = lfirst_oid(lc); + + /* Move the constraint from the old to the new index */ + constraintTuple = SearchSysCacheCopy1(CONSTROID, + ObjectIdGetDatum(constraintOid)); + if (!HeapTupleIsValid(constraintTuple)) + elog(ERROR, "could not find tuple for constraint %u", constraintOid); + + conForm = ((Form_pg_constraint) GETSTRUCT(constraintTuple)); + + if (conForm->conindid == oldIndexId) + { + conForm->conindid = newIndexId; + + CatalogTupleUpdate(pg_constraint, &constraintTuple->t_self, constraintTuple); + } + + heap_freetuple(constraintTuple); + + /* Search for trigger records */ + ScanKeyInit(&key[0], + Anum_pg_trigger_tgconstraint, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(constraintOid)); + + scan = systable_beginscan(pg_trigger, TriggerConstraintIndexId, true, + NULL, 1, key); + + while (HeapTupleIsValid((triggerTuple = systable_getnext(scan)))) + { + Form_pg_trigger tgForm = (Form_pg_trigger) GETSTRUCT(triggerTuple); + + if (tgForm->tgconstrindid != oldIndexId) + continue; + + /* Make a modifiable copy */ + triggerTuple = heap_copytuple(triggerTuple); + tgForm = (Form_pg_trigger) GETSTRUCT(triggerTuple); + + tgForm->tgconstrindid = newIndexId; + + CatalogTupleUpdate(pg_trigger, &triggerTuple->t_self, triggerTuple); + + heap_freetuple(triggerTuple); + } + + systable_endscan(scan); + } + + /* + * Move comment if any + */ + { + Relation description; + ScanKeyData skey[3]; + SysScanDesc sd; + HeapTuple tuple; + Datum values[Natts_pg_description] = {0}; + bool nulls[Natts_pg_description] = {0}; + bool replaces[Natts_pg_description] = {0}; + + values[Anum_pg_description_objoid - 1] = ObjectIdGetDatum(newIndexId); + replaces[Anum_pg_description_objoid - 1] = true; + + ScanKeyInit(&skey[0], + Anum_pg_description_objoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(oldIndexId)); + ScanKeyInit(&skey[1], + Anum_pg_description_classoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationRelationId)); + ScanKeyInit(&skey[2], + Anum_pg_description_objsubid, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(0)); + + description = table_open(DescriptionRelationId, RowExclusiveLock); + + sd = systable_beginscan(description, DescriptionObjIndexId, true, + NULL, 3, skey); + + while ((tuple = systable_getnext(sd)) != NULL) + { + tuple = heap_modify_tuple(tuple, RelationGetDescr(description), + values, nulls, replaces); + CatalogTupleUpdate(description, &tuple->t_self, tuple); + + break; /* Assume there can be only one match */ + } + + systable_endscan(sd); + table_close(description, NoLock); + } + + /* + * Move all dependencies on the old index to the new + */ + + if (OidIsValid(indexConstraintOid)) + { + ObjectAddress myself, + referenced; + + /* Change to having the new index depend on the constraint */ + deleteDependencyRecordsForClass(RelationRelationId, oldIndexId, + ConstraintRelationId, DEPENDENCY_INTERNAL); + + myself.classId = RelationRelationId; + myself.objectId = newIndexId; + myself.objectSubId = 0; + + referenced.classId = ConstraintRelationId; + referenced.objectId = indexConstraintOid; + referenced.objectSubId = 0; + + recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL); + } + + changeDependenciesOn(RelationRelationId, oldIndexId, newIndexId); + + /* Close relations */ + table_close(pg_class, RowExclusiveLock); + table_close(pg_index, RowExclusiveLock); + table_close(pg_constraint, RowExclusiveLock); + table_close(pg_trigger, RowExclusiveLock); + + /* The lock taken previously is not released until the end of transaction */ + relation_close(oldClassRel, NoLock); + relation_close(newClassRel, NoLock); +} + +/* + * index_concurrently_set_dead + * + * Perform the last invalidation stage of DROP INDEX CONCURRENTLY or REINDEX + * CONCURRENTLY before actually dropping the index. After calling this + * function the index is seen by all the backends as dead. Low-level locks + * taken here are kept until the end of the transaction doing calling this + * function. + */ +void +index_concurrently_set_dead(Oid heapOid, Oid indexOid) +{ + Relation heapRelation, + indexRelation; + + /* + * No more predicate locks will be acquired on this index, and we're about + * to stop doing inserts into the index which could show conflicts with + * existing predicate locks, so now is the time to move them to the heap + * relation. + */ + heapRelation = table_open(heapOid, ShareUpdateExclusiveLock); + indexRelation = index_open(indexOid, ShareUpdateExclusiveLock); + TransferPredicateLocksToHeapRelation(indexRelation); + + /* + * Now we are sure that nobody uses the index for queries; they just might + * have it open for updating it. So now we can unset indisready and + * indislive, then wait till nobody could be using it at all anymore. + */ + index_set_state_flags(indexOid, INDEX_DROP_SET_DEAD); + + /* + * Invalidate the relcache for the table, so that after this commit all + * sessions will refresh the table's index list. Forgetting just the + * index's relcache entry is not enough. + */ + CacheInvalidateRelcache(heapRelation); + + /* + * Close the relations again, though still holding session lock. + */ + table_close(heapRelation, NoLock); + index_close(indexRelation, NoLock); +} + +/* + * index_concurrently_drop + * + * Drop a single index concurrently as the last step of an index concurrent + * process. Deletion is done through performDeletion or dependencies of the + * index would not get dropped. At this point all the indexes are already + * considered as invalid and dead so they can be dropped without using any + * concurrent options as it is certain that they will not interact with other + * server sessions. + */ +void +index_concurrently_drop(Oid indexId) +{ + Oid constraintOid = get_index_constraint(indexId); + ObjectAddress object; + Form_pg_index indexForm; + Relation pg_index; + HeapTuple indexTuple; + + /* + * Check that the index dropped here is not alive, it might be used by + * other backends in this case. + */ + pg_index = table_open(IndexRelationId, RowExclusiveLock); + + indexTuple = SearchSysCacheCopy1(INDEXRELID, + ObjectIdGetDatum(indexId)); + if (!HeapTupleIsValid(indexTuple)) + elog(ERROR, "cache lookup failed for index %u", indexId); + indexForm = (Form_pg_index) GETSTRUCT(indexTuple); + + /* + * This is only a safety check, just to avoid live indexes from being + * dropped. + */ + if (indexForm->indislive) + elog(ERROR, "cannot drop live index with OID %u", indexId); + + /* Clean up */ + table_close(pg_index, RowExclusiveLock); + + /* + * We are sure to have a dead index, so begin the drop process. Register + * constraint or index for drop. + */ + if (OidIsValid(constraintOid)) + { + object.classId = ConstraintRelationId; + object.objectId = constraintOid; + } + else + { + object.classId = RelationRelationId; + object.objectId = indexId; + } + + object.objectSubId = 0; + + /* Perform deletion for normal and toast indexes */ + performDeletion(&object, DROP_RESTRICT, 0); +} + /* * index_constraint_create * @@ -1590,36 +2089,8 @@ index_drop(Oid indexId, bool concurrent) */ WaitForLockers(heaplocktag, AccessExclusiveLock); - /* - * No more predicate locks will be acquired on this index, and we're - * about to stop doing inserts into the index which could show - * conflicts with existing predicate locks, so now is the time to move - * them to the heap relation. - */ - userHeapRelation = table_open(heapId, ShareUpdateExclusiveLock); - userIndexRelation = index_open(indexId, ShareUpdateExclusiveLock); - TransferPredicateLocksToHeapRelation(userIndexRelation); - - /* - * Now we are sure that nobody uses the index for queries; they just - * might have it open for updating it. So now we can unset indisready - * and indislive, then wait till nobody could be using it at all - * anymore. - */ - index_set_state_flags(indexId, INDEX_DROP_SET_DEAD); - - /* - * Invalidate the relcache for the table, so that after this commit - * all sessions will refresh the table's index list. Forgetting just - * the index's relcache entry is not enough. - */ - CacheInvalidateRelcache(userHeapRelation); - - /* - * Close the relations again, though still holding session lock. - */ - table_close(userHeapRelation, NoLock); - index_close(userIndexRelation, NoLock); + /* Finish invalidation of index and mark it as dead */ + index_concurrently_set_dead(heapId, indexId); /* * Again, commit the transaction to make the pg_index update visible diff --git a/src/backend/catalog/pg_depend.c b/src/backend/catalog/pg_depend.c index 2b8f651c99..969b34e752 100644 --- a/src/backend/catalog/pg_depend.c +++ b/src/backend/catalog/pg_depend.c @@ -375,6 +375,94 @@ changeDependencyFor(Oid classId, Oid objectId, return count; } +/* + * Adjust all dependency records to point to a different object of the same type + * + * refClassId/oldRefObjectId specify the old referenced object. + * newRefObjectId is the new referenced object (must be of class refClassId). + * + * Returns the number of records updated. + */ +long +changeDependenciesOn(Oid refClassId, Oid oldRefObjectId, + Oid newRefObjectId) +{ + long count = 0; + Relation depRel; + ScanKeyData key[2]; + SysScanDesc scan; + HeapTuple tup; + ObjectAddress objAddr; + bool newIsPinned; + + depRel = heap_open(DependRelationId, RowExclusiveLock); + + /* + * If oldRefObjectId is pinned, there won't be any dependency entries on + * it --- we can't cope in that case. (This isn't really worth expending + * code to fix, in current usage; it just means you can't rename stuff out + * of pg_catalog, which would likely be a bad move anyway.) + */ + objAddr.classId = refClassId; + objAddr.objectId = oldRefObjectId; + objAddr.objectSubId = 0; + + if (isObjectPinned(&objAddr, depRel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot remove dependency on %s because it is a system object", + getObjectDescription(&objAddr)))); + + /* + * We can handle adding a dependency on something pinned, though, since + * that just means deleting the dependency entry. + */ + objAddr.objectId = newRefObjectId; + + newIsPinned = isObjectPinned(&objAddr, depRel); + + /* Now search for dependency records */ + ScanKeyInit(&key[0], + Anum_pg_depend_refclassid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(refClassId)); + ScanKeyInit(&key[1], + Anum_pg_depend_refobjid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(oldRefObjectId)); + + scan = systable_beginscan(depRel, DependReferenceIndexId, true, + NULL, 2, key); + + while (HeapTupleIsValid((tup = systable_getnext(scan)))) + { + Form_pg_depend depform = (Form_pg_depend) GETSTRUCT(tup); + + if (newIsPinned) + CatalogTupleDelete(depRel, &tup->t_self); + else + { + /* make a modifiable copy */ + tup = heap_copytuple(tup); + depform = (Form_pg_depend) GETSTRUCT(tup); + + depform->refobjid = newRefObjectId; + + CatalogTupleUpdate(depRel, &tup->t_self, tup); + + heap_freetuple(tup); + } + + count++; + } + + systable_endscan(scan); + + table_close(depRel, RowExclusiveLock); + + return count; +} + /* * isObjectPinned() * @@ -734,3 +822,58 @@ get_index_constraint(Oid indexId) return constraintId; } + +/* + * get_index_ref_constraints + * Given the OID of an index, return the OID of all foreign key + * constraints which reference the index. + */ +List * +get_index_ref_constraints(Oid indexId) +{ + List *result = NIL; + Relation depRel; + ScanKeyData key[3]; + SysScanDesc scan; + HeapTuple tup; + + /* Search the dependency table for the index */ + depRel = heap_open(DependRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_depend_refclassid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationRelationId)); + ScanKeyInit(&key[1], + Anum_pg_depend_refobjid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(indexId)); + ScanKeyInit(&key[2], + Anum_pg_depend_refobjsubid, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(0)); + + scan = systable_beginscan(depRel, DependReferenceIndexId, true, + NULL, 3, key); + + while (HeapTupleIsValid(tup = systable_getnext(scan))) + { + Form_pg_depend deprec = (Form_pg_depend) GETSTRUCT(tup); + + /* + * We assume any normal dependency from a constraint must be what we + * are looking for. + */ + if (deprec->classid == ConstraintRelationId && + deprec->objsubid == 0 && + deprec->deptype == DEPENDENCY_NORMAL) + { + result = lappend_oid(result, deprec->objid); + } + } + + systable_endscan(scan); + table_close(depRel, AccessShareLock); + + return result; +} diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c index 77be19175a..fb93c41c88 100644 --- a/src/backend/catalog/toasting.c +++ b/src/backend/catalog/toasting.c @@ -336,7 +336,7 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, list_make2("chunk_id", "chunk_seq"), BTREE_AM_OID, rel->rd_rel->reltablespace, - collationObjectId, classObjectId, coloptions, (Datum) 0, + collationObjectId, classObjectId, coloptions, (Datum) 0, NULL, INDEX_CREATE_IS_PRIMARY, 0, true, true, NULL); table_close(toast_rel, NoLock); diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 5b2b8d2969..2b4971fc93 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -59,6 +59,7 @@ #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/partcache.h" +#include "utils/pg_rusage.h" #include "utils/regproc.h" #include "utils/snapmgr.h" #include "utils/syscache.h" @@ -84,6 +85,7 @@ static char *ChooseIndexNameAddition(List *colnames); static List *ChooseIndexColumnNames(List *indexElems); static void RangeVarCallbackForReindexIndex(const RangeVar *relation, Oid relId, Oid oldRelId, void *arg); +static bool ReindexRelationConcurrently(Oid relationOid, int options); static void ReindexPartitionedIndex(Relation parentIdx); /* @@ -298,6 +300,90 @@ CheckIndexCompatible(Oid oldId, return ret; } + +/* + * WaitForOlderSnapshots + * + * Wait for transactions that might have an older snapshot than the given xmin + * limit, because it might not contain tuples deleted just before it has + * been taken. Obtain a list of VXIDs of such transactions, and wait for them + * individually. + * + * We can exclude any running transactions that have xmin > the xmin given; + * their oldest snapshot must be newer than our xmin limit. + * We can also exclude any transactions that have xmin = zero, since they + * evidently have no live snapshot at all (and any one they might be in + * process of taking is certainly newer than ours). Transactions in other + * DBs can be ignored too, since they'll never even be able to see this + * index. + * + * We can also exclude autovacuum processes and processes running manual + * lazy VACUUMs, because they won't be fazed by missing index entries + * either. (Manual ANALYZEs, however, can't be excluded because they + * might be within transactions that are going to do arbitrary operations + * later.) + * + * Also, GetCurrentVirtualXIDs never reports our own vxid, so we need not + * check for that. + * + * If a process goes idle-in-transaction with xmin zero, we do not need to + * wait for it anymore, per the above argument. We do not have the + * infrastructure right now to stop waiting if that happens, but we can at + * least avoid the folly of waiting when it is idle at the time we would + * begin to wait. We do this by repeatedly rechecking the output of + * GetCurrentVirtualXIDs. If, during any iteration, a particular vxid + * doesn't show up in the output, we know we can forget about it. + */ +static void +WaitForOlderSnapshots(TransactionId limitXmin) +{ + int i, + n_old_snapshots; + VirtualTransactionId *old_snapshots; + + old_snapshots = GetCurrentVirtualXIDs(limitXmin, true, false, + PROC_IS_AUTOVACUUM | PROC_IN_VACUUM, + &n_old_snapshots); + + for (i = 0; i < n_old_snapshots; i++) + { + if (!VirtualTransactionIdIsValid(old_snapshots[i])) + continue; /* found uninteresting in previous cycle */ + + if (i > 0) + { + /* see if anything's changed ... */ + VirtualTransactionId *newer_snapshots; + int n_newer_snapshots; + int j; + int k; + + newer_snapshots = GetCurrentVirtualXIDs(limitXmin, + true, false, + PROC_IS_AUTOVACUUM | PROC_IN_VACUUM, + &n_newer_snapshots); + for (j = i; j < n_old_snapshots; j++) + { + if (!VirtualTransactionIdIsValid(old_snapshots[j])) + continue; /* found uninteresting in previous cycle */ + for (k = 0; k < n_newer_snapshots; k++) + { + if (VirtualTransactionIdEquals(old_snapshots[j], + newer_snapshots[k])) + break; + } + if (k >= n_newer_snapshots) /* not there anymore */ + SetInvalidVirtualTransactionId(old_snapshots[j]); + } + pfree(newer_snapshots); + } + + if (VirtualTransactionIdIsValid(old_snapshots[i])) + VirtualXactLock(old_snapshots[i], true); + } +} + + /* * DefineIndex * Creates a new index. @@ -346,7 +432,6 @@ DefineIndex(Oid relationId, List *indexColNames; List *allIndexParams; Relation rel; - Relation indexRelation; HeapTuple tuple; Form_pg_am accessMethodForm; IndexAmRoutine *amRoutine; @@ -361,9 +446,7 @@ DefineIndex(Oid relationId, int numberOfAttributes; int numberOfKeyAttributes; TransactionId limitXmin; - VirtualTransactionId *old_snapshots; ObjectAddress address; - int n_old_snapshots; LockRelId heaprelid; LOCKTAG heaplocktag; LOCKMODE lockmode; @@ -856,7 +939,7 @@ DefineIndex(Oid relationId, stmt->oldNode, indexInfo, indexColNames, accessMethodId, tablespaceId, collationObjectId, classObjectId, - coloptions, reloptions, + coloptions, reloptions, NULL, flags, constr_flags, allowSystemTableMods, !check_rights, &createdConstraintId); @@ -1152,34 +1235,14 @@ DefineIndex(Oid relationId, * HOT-chain or the extension of the chain is HOT-safe for this index. */ - /* Open and lock the parent heap relation */ - rel = table_open(relationId, ShareUpdateExclusiveLock); - - /* And the target index relation */ - indexRelation = index_open(indexRelationId, RowExclusiveLock); - /* Set ActiveSnapshot since functions in the indexes may need it */ PushActiveSnapshot(GetTransactionSnapshot()); - /* We have to re-build the IndexInfo struct, since it was lost in commit */ - indexInfo = BuildIndexInfo(indexRelation); - Assert(!indexInfo->ii_ReadyForInserts); - indexInfo->ii_Concurrent = true; - indexInfo->ii_BrokenHotChain = false; - - /* Now build the index */ - index_build(rel, indexRelation, indexInfo, false, true); - - /* Close both the relations, but keep the locks */ - table_close(rel, NoLock); - index_close(indexRelation, NoLock); - - /* - * Update the pg_index row to mark the index as ready for inserts. Once we - * commit this transaction, any new transactions that open the table must - * insert new entries into the index for insertions and non-HOT updates. - */ - index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY); + /* Perform concurrent build of index */ + index_concurrently_build(RangeVarGetRelid(stmt->relation, + ShareUpdateExclusiveLock, + false), + indexRelationId); /* we can do away with our snapshot */ PopActiveSnapshot(); @@ -1251,74 +1314,9 @@ DefineIndex(Oid relationId, * The index is now valid in the sense that it contains all currently * interesting tuples. But since it might not contain tuples deleted just * before the reference snap was taken, we have to wait out any - * transactions that might have older snapshots. Obtain a list of VXIDs - * of such transactions, and wait for them individually. - * - * We can exclude any running transactions that have xmin > the xmin of - * our reference snapshot; their oldest snapshot must be newer than ours. - * We can also exclude any transactions that have xmin = zero, since they - * evidently have no live snapshot at all (and any one they might be in - * process of taking is certainly newer than ours). Transactions in other - * DBs can be ignored too, since they'll never even be able to see this - * index. - * - * We can also exclude autovacuum processes and processes running manual - * lazy VACUUMs, because they won't be fazed by missing index entries - * either. (Manual ANALYZEs, however, can't be excluded because they - * might be within transactions that are going to do arbitrary operations - * later.) - * - * Also, GetCurrentVirtualXIDs never reports our own vxid, so we need not - * check for that. - * - * If a process goes idle-in-transaction with xmin zero, we do not need to - * wait for it anymore, per the above argument. We do not have the - * infrastructure right now to stop waiting if that happens, but we can at - * least avoid the folly of waiting when it is idle at the time we would - * begin to wait. We do this by repeatedly rechecking the output of - * GetCurrentVirtualXIDs. If, during any iteration, a particular vxid - * doesn't show up in the output, we know we can forget about it. + * transactions that might have older snapshots. */ - old_snapshots = GetCurrentVirtualXIDs(limitXmin, true, false, - PROC_IS_AUTOVACUUM | PROC_IN_VACUUM, - &n_old_snapshots); - - for (i = 0; i < n_old_snapshots; i++) - { - if (!VirtualTransactionIdIsValid(old_snapshots[i])) - continue; /* found uninteresting in previous cycle */ - - if (i > 0) - { - /* see if anything's changed ... */ - VirtualTransactionId *newer_snapshots; - int n_newer_snapshots; - int j; - int k; - - newer_snapshots = GetCurrentVirtualXIDs(limitXmin, - true, false, - PROC_IS_AUTOVACUUM | PROC_IN_VACUUM, - &n_newer_snapshots); - for (j = i; j < n_old_snapshots; j++) - { - if (!VirtualTransactionIdIsValid(old_snapshots[j])) - continue; /* found uninteresting in previous cycle */ - for (k = 0; k < n_newer_snapshots; k++) - { - if (VirtualTransactionIdEquals(old_snapshots[j], - newer_snapshots[k])) - break; - } - if (k >= n_newer_snapshots) /* not there anymore */ - SetInvalidVirtualTransactionId(old_snapshots[j]); - } - pfree(newer_snapshots); - } - - if (VirtualTransactionIdIsValid(old_snapshots[i])) - VirtualXactLock(old_snapshots[i], true); - } + WaitForOlderSnapshots(limitXmin); /* * Index can now be marked valid -- update its pg_index entry @@ -2204,7 +2202,7 @@ ChooseIndexColumnNames(List *indexElems) * Recreate a specific index. */ void -ReindexIndex(RangeVar *indexRelation, int options) +ReindexIndex(RangeVar *indexRelation, int options, bool concurrent) { Oid indOid; Oid heapOid = InvalidOid; @@ -2216,7 +2214,8 @@ ReindexIndex(RangeVar *indexRelation, int options) * obtain lock on table first, to avoid deadlock hazard. The lock level * used here must match the index lock obtained in reindex_index(). */ - indOid = RangeVarGetRelidExtended(indexRelation, AccessExclusiveLock, + indOid = RangeVarGetRelidExtended(indexRelation, + concurrent ? ShareUpdateExclusiveLock : AccessExclusiveLock, 0, RangeVarCallbackForReindexIndex, (void *) &heapOid); @@ -2236,7 +2235,10 @@ ReindexIndex(RangeVar *indexRelation, int options) persistence = irel->rd_rel->relpersistence; index_close(irel, NoLock); - reindex_index(indOid, false, persistence, options); + if (concurrent) + ReindexRelationConcurrently(indOid, options); + else + reindex_index(indOid, false, persistence, options); } /* @@ -2304,18 +2306,26 @@ RangeVarCallbackForReindexIndex(const RangeVar *relation, * Recreate all indexes of a table (and of its toast table, if any) */ Oid -ReindexTable(RangeVar *relation, int options) +ReindexTable(RangeVar *relation, int options, bool concurrent) { Oid heapOid; + bool result; /* The lock level used here should match reindex_relation(). */ - heapOid = RangeVarGetRelidExtended(relation, ShareLock, 0, + heapOid = RangeVarGetRelidExtended(relation, + concurrent ? ShareUpdateExclusiveLock : ShareLock, + 0, RangeVarCallbackOwnsTable, NULL); - if (!reindex_relation(heapOid, - REINDEX_REL_PROCESS_TOAST | - REINDEX_REL_CHECK_CONSTRAINTS, - options)) + if (concurrent) + result = ReindexRelationConcurrently(heapOid, options); + else + result = reindex_relation(heapOid, + REINDEX_REL_PROCESS_TOAST | + REINDEX_REL_CHECK_CONSTRAINTS, + options); + + if (!result) ereport(NOTICE, (errmsg("table \"%s\" has no indexes", relation->relname))); @@ -2333,7 +2343,7 @@ ReindexTable(RangeVar *relation, int options) */ void ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, - int options) + int options, bool concurrent) { Oid objectOid; Relation relationRelation; @@ -2345,6 +2355,7 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, List *relids = NIL; ListCell *l; int num_keys; + bool concurrent_warning = false; AssertArg(objectName); Assert(objectKind == REINDEX_OBJECT_SCHEMA || @@ -2453,6 +2464,20 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, !pg_class_ownercheck(relid, GetUserId())) continue; + /* + * Skip system tables that index_create() would reject to index + * concurrently. + */ + if (concurrent && IsSystemNamespace(get_rel_namespace(relid))) + { + if (!concurrent_warning) + ereport(WARNING, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("concurrent reindex is not supported for catalog relations, skipping all"))); + concurrent_warning = true; + continue; + } + /* Save the list of relation OIDs in private context */ old = MemoryContextSwitchTo(private_context); @@ -2479,26 +2504,661 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, foreach(l, relids) { Oid relid = lfirst_oid(l); + bool result; StartTransactionCommand(); /* functions in indexes may want a snapshot set */ PushActiveSnapshot(GetTransactionSnapshot()); - if (reindex_relation(relid, - REINDEX_REL_PROCESS_TOAST | - REINDEX_REL_CHECK_CONSTRAINTS, - options)) - if (options & REINDEXOPT_VERBOSE) + if (concurrent) + { + result = ReindexRelationConcurrently(relid, options); + /* ReindexRelationConcurrently() does the verbose output */ + + PushActiveSnapshot(GetTransactionSnapshot()); + } + else + { + result = reindex_relation(relid, + REINDEX_REL_PROCESS_TOAST | + REINDEX_REL_CHECK_CONSTRAINTS, + options); + + if (result && (options & REINDEXOPT_VERBOSE)) ereport(INFO, (errmsg("table \"%s.%s\" was reindexed", get_namespace_name(get_rel_namespace(relid)), get_rel_name(relid)))); + } + + PopActiveSnapshot(); + CommitTransactionCommand(); + } + StartTransactionCommand(); + + MemoryContextDelete(private_context); +} + + +/* + * ReindexRelationConcurrently + * + * Process REINDEX CONCURRENTLY for given relation Oid. The relation can be + * either an index or a table. If a table is specified, each phase is processed + * one by one for each table's indexes as well as its dependent toast indexes + * if this table has a toast relation defined. + */ +static bool +ReindexRelationConcurrently(Oid relationOid, int options) +{ + List *parentRelationIds = NIL; + List *indexIds = NIL; + List *newIndexIds = NIL; + List *relationLocks = NIL; + List *lockTags = NIL; + ListCell *lc, + *lc2; + MemoryContext private_context; + MemoryContext old; + char relkind; + char *relationName = NULL; + char *relationNamespace = NULL; + PGRUsage ru0; + + /* + * Create a memory context that will survive forced transaction commits we + * do below. Since it is a child of PortalContext, it will go away + * eventually even if we suffer an error; there's no need for special + * abort cleanup logic. + */ + private_context = AllocSetContextCreate(PortalContext, + "ReindexConcurrent", + ALLOCSET_SMALL_SIZES); + + if (options & REINDEXOPT_VERBOSE) + { + /* Save data needed by REINDEX VERBOSE in private context */ + old = MemoryContextSwitchTo(private_context); + + relationName = get_rel_name(relationOid); + relationNamespace = get_namespace_name(get_rel_namespace(relationOid)); + + pg_rusage_init(&ru0); + + MemoryContextSwitchTo(old); + } + + relkind = get_rel_relkind(relationOid); + + /* + * Extract the list of indexes that are going to be rebuilt based on the + * list of relation Oids given by caller. For each element in given list, + * if the relkind of given relation Oid is a table, all its valid indexes + * will be rebuilt, including its associated toast table indexes. If + * relkind is an index, this index itself will be rebuilt. The locks taken + * on parent relations and involved indexes are kept until this + * transaction is committed to protect against schema changes that might + * occur until the session lock is taken on each relation, session lock + * used to similarly protect from any schema change that could happen + * within the multiple transactions that are used during this process. + */ + switch (relkind) + { + case RELKIND_RELATION: + case RELKIND_MATVIEW: + case RELKIND_TOASTVALUE: + { + /* + * In the case of a relation, find all its indexes including + * toast indexes. + */ + Relation heapRelation; + + /* Save the list of relation OIDs in private context */ + old = MemoryContextSwitchTo(private_context); + + /* Track this relation for session locks */ + parentRelationIds = lappend_oid(parentRelationIds, relationOid); + + MemoryContextSwitchTo(old); + + /* A shared relation cannot be reindexed concurrently */ + if (IsSharedRelation(relationOid)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("concurrent reindex is not supported for shared relations"))); + + /* A system catalog cannot be reindexed concurrently */ + if (IsSystemNamespace(get_rel_namespace(relationOid))) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("concurrent reindex is not supported for catalog relations"))); + + /* Open relation to get its indexes */ + heapRelation = table_open(relationOid, ShareUpdateExclusiveLock); + + /* Add all the valid indexes of relation to list */ + foreach(lc, RelationGetIndexList(heapRelation)) + { + Oid cellOid = lfirst_oid(lc); + Relation indexRelation = index_open(cellOid, + ShareUpdateExclusiveLock); + + if (!indexRelation->rd_index->indisvalid) + ereport(WARNING, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("cannot reindex concurrently invalid index \"%s.%s\", skipping", + get_namespace_name(get_rel_namespace(cellOid)), + get_rel_name(cellOid)))); + else + { + /* Save the list of relation OIDs in private context */ + old = MemoryContextSwitchTo(private_context); + + indexIds = lappend_oid(indexIds, cellOid); + + MemoryContextSwitchTo(old); + } + + index_close(indexRelation, NoLock); + } + + /* Also add the toast indexes */ + if (OidIsValid(heapRelation->rd_rel->reltoastrelid)) + { + Oid toastOid = heapRelation->rd_rel->reltoastrelid; + Relation toastRelation = table_open(toastOid, + ShareUpdateExclusiveLock); + + /* Save the list of relation OIDs in private context */ + old = MemoryContextSwitchTo(private_context); + + /* Track this relation for session locks */ + parentRelationIds = lappend_oid(parentRelationIds, toastOid); + + MemoryContextSwitchTo(old); + + foreach(lc2, RelationGetIndexList(toastRelation)) + { + Oid cellOid = lfirst_oid(lc2); + Relation indexRelation = index_open(cellOid, + ShareUpdateExclusiveLock); + + if (!indexRelation->rd_index->indisvalid) + ereport(WARNING, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("cannot reindex concurrently invalid index \"%s.%s\", skipping", + get_namespace_name(get_rel_namespace(cellOid)), + get_rel_name(cellOid)))); + else + { + /* + * Save the list of relation OIDs in private + * context + */ + old = MemoryContextSwitchTo(private_context); + + indexIds = lappend_oid(indexIds, cellOid); + + MemoryContextSwitchTo(old); + } + + index_close(indexRelation, NoLock); + } + + table_close(toastRelation, NoLock); + } + + table_close(heapRelation, NoLock); + break; + } + case RELKIND_INDEX: + { + /* + * For an index simply add its Oid to list. Invalid indexes + * cannot be included in list. + */ + Relation indexRelation = index_open(relationOid, ShareUpdateExclusiveLock); + Oid parentOid = IndexGetRelation(relationOid, false); + + /* A shared relation cannot be reindexed concurrently */ + if (IsSharedRelation(parentOid)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("concurrent reindex is not supported for shared relations"))); + + /* A system catalog cannot be reindexed concurrently */ + if (IsSystemNamespace(get_rel_namespace(parentOid))) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("concurrent reindex is not supported for catalog relations"))); + + /* Save the list of relation OIDs in private context */ + old = MemoryContextSwitchTo(private_context); + + /* Track the parent relation of this index for session locks */ + parentRelationIds = list_make1_oid(parentOid); + + MemoryContextSwitchTo(old); + + if (!indexRelation->rd_index->indisvalid) + ereport(WARNING, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("cannot reindex concurrently invalid index \"%s.%s\", skipping", + get_namespace_name(get_rel_namespace(relationOid)), + get_rel_name(relationOid)))); + else + { + /* Save the list of relation OIDs in private context */ + old = MemoryContextSwitchTo(private_context); + + indexIds = lappend_oid(indexIds, relationOid); + + MemoryContextSwitchTo(old); + } + + index_close(indexRelation, NoLock); + break; + } + case RELKIND_PARTITIONED_TABLE: + /* see reindex_relation() */ + ereport(WARNING, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("REINDEX of partitioned tables is not yet implemented, skipping \"%s\"", + get_rel_name(relationOid)))); + return false; + default: + /* Return error if type of relation is not supported */ + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot reindex concurrently this type of relation"))); + break; + } + + /* Definetely no indexes, so leave */ + if (indexIds == NIL) + return false; + + Assert(parentRelationIds != NIL); + + /* + * Phase 1 of REINDEX CONCURRENTLY + * + * Here begins the process for concurrently rebuilding the index entries. + * We need first to create an index which is based on the same data as the + * former index except that it will be only registered in catalogs and + * will be built later. It is possible to perform all the operations on + * all the indexes at the same time for a parent relation including + * indexes for its toast relation. + */ + + /* Do the concurrent index creation for each index */ + foreach(lc, indexIds) + { + char *concurrentName; + Oid indOid = lfirst_oid(lc); + Oid concurrentOid = InvalidOid; + Relation indexRel, + indexParentRel, + indexConcurrentRel; + LockRelId lockrelid; + + indexRel = index_open(indOid, ShareUpdateExclusiveLock); + /* Open the index parent relation, might be a toast or parent relation */ + indexParentRel = table_open(indexRel->rd_index->indrelid, + ShareUpdateExclusiveLock); + + /* Choose a temporary relation name for the new index */ + concurrentName = ChooseRelationName(get_rel_name(indOid), + NULL, + "ccnew", + get_rel_namespace(indexRel->rd_index->indrelid), + false); + + /* Create new index definition based on given index */ + concurrentOid = index_concurrently_create_copy(indexParentRel, + indOid, + concurrentName); + + /* Now open the relation of the new index, a lock is also needed on it */ + indexConcurrentRel = index_open(concurrentOid, ShareUpdateExclusiveLock); + + /* Save the list of oids and locks in private context */ + old = MemoryContextSwitchTo(private_context); + + /* Save the new index Oid */ + newIndexIds = lappend_oid(newIndexIds, concurrentOid); + + /* + * Save lockrelid to protect each relation from drop then close + * relations. The lockrelid on parent relation is not taken here to + * avoid multiple locks taken on the same relation, instead we rely on + * parentRelationIds built earlier. + */ + lockrelid = indexRel->rd_lockInfo.lockRelId; + relationLocks = lappend(relationLocks, &lockrelid); + lockrelid = indexConcurrentRel->rd_lockInfo.lockRelId; + relationLocks = lappend(relationLocks, &lockrelid); + + MemoryContextSwitchTo(old); + + index_close(indexRel, NoLock); + index_close(indexConcurrentRel, NoLock); + table_close(indexParentRel, NoLock); + } + + /* + * Save the heap lock for following visibility checks with other backends + * might conflict with this session. + */ + foreach(lc, parentRelationIds) + { + Relation heapRelation = table_open(lfirst_oid(lc), ShareUpdateExclusiveLock); + LockRelId lockrelid = heapRelation->rd_lockInfo.lockRelId; + LOCKTAG *heaplocktag; + + /* Save the list of locks in private context */ + old = MemoryContextSwitchTo(private_context); + + /* Add lockrelid of parent relation to the list of locked relations */ + relationLocks = lappend(relationLocks, &lockrelid); + + heaplocktag = (LOCKTAG *) palloc(sizeof(LOCKTAG)); + + /* Save the LOCKTAG for this parent relation for the wait phase */ + SET_LOCKTAG_RELATION(*heaplocktag, lockrelid.dbId, lockrelid.relId); + lockTags = lappend(lockTags, heaplocktag); + + MemoryContextSwitchTo(old); + + /* Close heap relation */ + table_close(heapRelation, NoLock); + } + + /* + * For a concurrent build, it is necessary to make the catalog entries + * visible to the other transactions before actually building the index. + * This will prevent them from making incompatible HOT updates. The index + * is marked as not ready and invalid so as no other transactions will try + * to use it for INSERT or SELECT. + * + * Before committing, get a session level lock on the relation, the + * concurrent index and its copy to insure that none of them are dropped + * until the operation is done. + */ + foreach(lc, relationLocks) + { + LockRelId lockRel = *((LockRelId *) lfirst(lc)); + + LockRelationIdForSession(&lockRel, ShareUpdateExclusiveLock); + } + + PopActiveSnapshot(); + CommitTransactionCommand(); + + /* + * Phase 2 of REINDEX CONCURRENTLY + * + * Build the new indexes in a separate transaction for each index to avoid + * having open transactions for an unnecessary long time. A concurrent + * build is done for each index that will replace the old indexes. Before + * doing that, we need to wait on the parent relations until no running + * transactions could have the parent table of index open. + */ + + /* Perform a wait on all the session locks */ + StartTransactionCommand(); + WaitForLockersMultiple(lockTags, ShareLock); + CommitTransactionCommand(); + + forboth(lc, indexIds, lc2, newIndexIds) + { + Relation indexRel; + Oid indOid = lfirst_oid(lc); + Oid concurrentOid = lfirst_oid(lc2); + Oid tableOid; + + CHECK_FOR_INTERRUPTS(); + + /* Start new transaction for this index's concurrent build */ + StartTransactionCommand(); + + /* Set ActiveSnapshot since functions in the indexes may need it */ + PushActiveSnapshot(GetTransactionSnapshot()); + + /* + * Index relation has been closed by previous commit, so reopen it to + * get its information. + */ + indexRel = index_open(indOid, ShareUpdateExclusiveLock); + tableOid = indexRel->rd_index->indrelid; + index_close(indexRel, NoLock); + + /* Perform concurrent build of new index */ + index_concurrently_build(tableOid, concurrentOid); + + /* We can do away with our snapshot */ + PopActiveSnapshot(); + + /* + * Commit this transaction to make the indisready update visible for + * concurrent index. + */ + CommitTransactionCommand(); + } + + /* + * Phase 3 of REINDEX CONCURRENTLY + * + * During this phase the old indexes catch up with any new tuples that + * were created during the previous phase. + * + * We once again wait until no transaction can have the table open with + * the index marked as read-only for updates. Each index validation is + * done in a separate transaction to minimize how long we hold an open + * transaction. + */ + + /* Perform a wait on all the session locks */ + StartTransactionCommand(); + WaitForLockersMultiple(lockTags, ShareLock); + CommitTransactionCommand(); + + /* + * Scan the heap for each new index, then insert any missing index + * entries. + */ + foreach(lc, newIndexIds) + { + Oid indOid = lfirst_oid(lc); + Oid relOid; + TransactionId limitXmin; + Snapshot snapshot; + + CHECK_FOR_INTERRUPTS(); + + /* Open separate transaction to validate index */ + StartTransactionCommand(); + + /* Get the parent relation Oid */ + relOid = IndexGetRelation(indOid, false); + + /* + * Take the reference snapshot that will be used for the old indexes + * validation. + */ + snapshot = RegisterSnapshot(GetTransactionSnapshot()); + PushActiveSnapshot(snapshot); + + /* Validate index, which might be a toast */ + validate_index(relOid, indOid, snapshot); + + /* + * We can now do away with our active snapshot, we still need to save + * the xmin limit to wait for older snapshots. + */ + limitXmin = snapshot->xmin; PopActiveSnapshot(); + + /* And we can remove the validating snapshot too */ + UnregisterSnapshot(snapshot); + + /* + * This new index is now valid as it contains all the tuples + * necessary. However, it might not have taken into account deleted + * tuples before the reference snapshot was taken, so we need to wait + * for the transactions that might have older snapshots than ours. + */ + WaitForOlderSnapshots(limitXmin); + + /* Commit this transaction now that the new index is valid */ CommitTransactionCommand(); } + + /* + * Phase 4 of REINDEX CONCURRENTLY + * + * Now that the new indexes have been validated, it is necessary to swap + * each new index with its corresponding old index. + * + * We mark the new indexes as valid and the old indexes dead at the same + * time to make sure we only get constraint violations from the indexes + * with the correct names. + */ + + StartTransactionCommand(); + + forboth(lc, indexIds, lc2, newIndexIds) + { + char *oldName; + Oid indOid = lfirst_oid(lc); + Oid concurrentOid = lfirst_oid(lc2); + Oid relOid; + + CHECK_FOR_INTERRUPTS(); + + relOid = IndexGetRelation(indOid, false); + + /* Choose a relation name for old index */ + oldName = ChooseRelationName(get_rel_name(indOid), + NULL, + "ccold", + get_rel_namespace(relOid), + false); + + /* Swap old index with the new one */ + index_concurrently_swap(concurrentOid, indOid, oldName); + + /* + * Invalidate the relcache for the table, so that after this commit + * all sessions will refresh any cached plans that might reference the + * index. + */ + CacheInvalidateRelcacheByRelid(relOid); + + /* + * CCI here so that subsequent iterations see the oldName in the + * catalog and can choose a nonconflicting name for their oldName. + * Otherwise, this could lead to conflicts if a table has two indexes + * whose names are equal for the first NAMEDATALEN-minus-a-few + * characters. + */ + CommandCounterIncrement(); + } + + /* Commit this transaction and make index swaps visible */ + CommitTransactionCommand(); + + /* + * Phase 5 of REINDEX CONCURRENTLY + * + * Mark the old indexes as dead so they can later be dropped. + * + * Note that it is necessary to wait for for virtual locks on the parent + * relation before setting the index as dead. + */ + + /* Perform a wait on all the session locks */ + StartTransactionCommand(); + WaitForLockersMultiple(lockTags, AccessExclusiveLock); + + foreach(lc, indexIds) + { + Oid indOid = lfirst_oid(lc); + Oid relOid; + + CHECK_FOR_INTERRUPTS(); + + relOid = IndexGetRelation(indOid, false); + + /* Finish the index invalidation and set it as dead. */ + index_concurrently_set_dead(relOid, indOid); + } + + /* Commit this transaction to make the updates visible. */ + CommitTransactionCommand(); + + /* + * Phase 6 of REINDEX CONCURRENTLY + * + * Drop the old indexes, with actually the same code path as DROP INDEX + * CONCURRENTLY. This is safe as all the old entries are already + * considered as invalid and not ready, so they will not be used by other + * backends for any read or write operations. + */ + + /* Perform a wait on all the session locks */ + StartTransactionCommand(); + WaitForLockersMultiple(lockTags, AccessExclusiveLock); + + /* Get fresh snapshot for next step */ + PushActiveSnapshot(GetTransactionSnapshot()); + + foreach(lc, indexIds) + { + Oid indOid = lfirst_oid(lc); + + CHECK_FOR_INTERRUPTS(); + + index_concurrently_drop(indOid); + } + + PopActiveSnapshot(); + CommitTransactionCommand(); + + /* + * Finallt release the session-level lock on the parent table. + */ + foreach(lc, relationLocks) + { + LockRelId lockRel = *((LockRelId *) lfirst(lc)); + + UnlockRelationIdForSession(&lockRel, ShareUpdateExclusiveLock); + } + + /* Log what we did */ + if (options & REINDEXOPT_VERBOSE) + { + if (relkind == RELKIND_INDEX) + ereport(INFO, + (errmsg("index \"%s.%s\" was reindexed", + relationNamespace, relationName), + errdetail("%s.", + pg_rusage_show(&ru0)))); + else + ereport(INFO, + (errmsg("table \"%s.%s\" was reindexed", + relationNamespace, relationName), + errdetail("%s.", + pg_rusage_show(&ru0)))); + } + + /* Start a new transaction to finish process properly */ StartTransactionCommand(); MemoryContextDelete(private_context); + + return true; } /* diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index ff76499137..ec3df34943 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -1257,6 +1257,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, bool is_partition; Form_pg_class classform; LOCKMODE heap_lockmode; + bool invalid_system_index = false; state = (struct DropRelationCallbackState *) arg; relkind = state->relkind; @@ -1319,7 +1320,36 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relOid)), rel->relname); - if (!allowSystemTableMods && IsSystemClass(relOid, classform)) + /* + * Check the case of a system index that might have been invalidated by a + * failed concurrent process and allow its drop. For the time being, this + * only concerns indexes of toast relations that became invalid during a + * REINDEX CONCURRENTLY process. + */ + if (IsSystemClass(relOid, classform) && relkind == RELKIND_INDEX) + { + HeapTuple locTuple; + Form_pg_index indexform; + bool indisvalid; + + locTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(relOid)); + if (!HeapTupleIsValid(locTuple)) + { + ReleaseSysCache(tuple); + return; + } + + indexform = (Form_pg_index) GETSTRUCT(locTuple); + indisvalid = indexform->indisvalid; + ReleaseSysCache(locTuple); + + /* Mark object as being an invalid index of system catalogs */ + if (!indisvalid) + invalid_system_index = true; + } + + /* In the case of an invalid index, it is fine to bypass this check */ + if (!invalid_system_index && !allowSystemTableMods && IsSystemClass(relOid, classform)) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied: \"%s\" is a system catalog", diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 3eb7e95d64..0ac1205af5 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -4353,6 +4353,7 @@ _copyReindexStmt(const ReindexStmt *from) COPY_NODE_FIELD(relation); COPY_STRING_FIELD(name); COPY_SCALAR_FIELD(options); + COPY_SCALAR_FIELD(concurrent); return newnode; } diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 5c4fa7d077..00874fb9a5 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -2098,6 +2098,7 @@ _equalReindexStmt(const ReindexStmt *a, const ReindexStmt *b) COMPARE_NODE_FIELD(relation); COMPARE_STRING_FIELD(name); COMPARE_SCALAR_FIELD(options); + COMPARE_SCALAR_FIELD(concurrent); return true; } diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index c1faf4152c..d09043c6e2 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -8266,42 +8266,46 @@ DropTransformStmt: DROP TRANSFORM opt_if_exists FOR Typename LANGUAGE name opt_d * * QUERY: * - * REINDEX [ (options) ] type + * REINDEX [ (options) ] type [CONCURRENTLY] *****************************************************************************/ ReindexStmt: - REINDEX reindex_target_type qualified_name + REINDEX reindex_target_type opt_concurrently qualified_name { ReindexStmt *n = makeNode(ReindexStmt); n->kind = $2; - n->relation = $3; + n->concurrent = $3; + n->relation = $4; n->name = NULL; n->options = 0; $$ = (Node *)n; } - | REINDEX reindex_target_multitable name + | REINDEX reindex_target_multitable opt_concurrently name { ReindexStmt *n = makeNode(ReindexStmt); n->kind = $2; - n->name = $3; + n->concurrent = $3; + n->name = $4; n->relation = NULL; n->options = 0; $$ = (Node *)n; } - | REINDEX '(' reindex_option_list ')' reindex_target_type qualified_name + | REINDEX '(' reindex_option_list ')' reindex_target_type opt_concurrently qualified_name { ReindexStmt *n = makeNode(ReindexStmt); n->kind = $5; - n->relation = $6; + n->concurrent = $6; + n->relation = $7; n->name = NULL; n->options = $3; $$ = (Node *)n; } - | REINDEX '(' reindex_option_list ')' reindex_target_multitable name + | REINDEX '(' reindex_option_list ')' reindex_target_multitable opt_concurrently name { ReindexStmt *n = makeNode(ReindexStmt); n->kind = $5; - n->name = $6; + n->concurrent = $6; + n->name = $7; n->relation = NULL; n->options = $3; $$ = (Node *)n; diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 6ec795f1b4..9f8f62b5de 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -774,16 +774,20 @@ standard_ProcessUtility(PlannedStmt *pstmt, { ReindexStmt *stmt = (ReindexStmt *) parsetree; + if (stmt->concurrent) + PreventInTransactionBlock(isTopLevel, + "REINDEX CONCURRENTLY"); + /* we choose to allow this during "read only" transactions */ PreventCommandDuringRecovery("REINDEX"); /* forbidden in parallel mode due to CommandIsReadOnly */ switch (stmt->kind) { case REINDEX_OBJECT_INDEX: - ReindexIndex(stmt->relation, stmt->options); + ReindexIndex(stmt->relation, stmt->options, stmt->concurrent); break; case REINDEX_OBJECT_TABLE: - ReindexTable(stmt->relation, stmt->options); + ReindexTable(stmt->relation, stmt->options, stmt->concurrent); break; case REINDEX_OBJECT_SCHEMA: case REINDEX_OBJECT_SYSTEM: @@ -799,7 +803,7 @@ standard_ProcessUtility(PlannedStmt *pstmt, (stmt->kind == REINDEX_OBJECT_SCHEMA) ? "REINDEX SCHEMA" : (stmt->kind == REINDEX_OBJECT_SYSTEM) ? "REINDEX SYSTEM" : "REINDEX DATABASE"); - ReindexMultipleTables(stmt->name, stmt->kind, stmt->options); + ReindexMultipleTables(stmt->name, stmt->kind, stmt->options, stmt->concurrent); break; default: elog(ERROR, "unrecognized object type: %d", diff --git a/src/bin/psql/common.c b/src/bin/psql/common.c index 5d8634d818..82511e34ac 100644 --- a/src/bin/psql/common.c +++ b/src/bin/psql/common.c @@ -2192,6 +2192,22 @@ command_no_begin(const char *query) return true; if (wordlen == 10 && pg_strncasecmp(query, "tablespace", 10) == 0) return true; + if (wordlen == 5 && (pg_strncasecmp(query, "index", 5) == 0 || + pg_strncasecmp(query, "table", 5) == 0)) + { + query += wordlen; + query = skip_white_space(query); + wordlen = 0; + while (isalpha((unsigned char) query[wordlen])) + wordlen += PQmblen(&query[wordlen], pset.encoding); + + /* + * REINDEX [ TABLE | INDEX ] CONCURRENTLY are not allowed in + * xacts. + */ + if (wordlen == 12 && pg_strncasecmp(query, "concurrently", 12) == 0) + return true; + } /* DROP INDEX CONCURRENTLY isn't allowed in xacts */ if (wordlen == 5 && pg_strncasecmp(query, "index", 5) == 0) diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 7b7a88fda3..da814bfec8 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -3205,12 +3205,24 @@ psql_completion(const char *text, int start, int end) else if (Matches("REINDEX")) COMPLETE_WITH("TABLE", "INDEX", "SYSTEM", "SCHEMA", "DATABASE"); else if (Matches("REINDEX", "TABLE")) - COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_indexables, NULL); + COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_indexables, + " UNION SELECT 'CONCURRENTLY'"); else if (Matches("REINDEX", "INDEX")) - COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_indexes, NULL); + COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_indexes, + " UNION SELECT 'CONCURRENTLY'"); else if (Matches("REINDEX", "SCHEMA")) - COMPLETE_WITH_QUERY(Query_for_list_of_schemas); + COMPLETE_WITH_QUERY(Query_for_list_of_schemas + " UNION SELECT 'CONCURRENTLY'"); else if (Matches("REINDEX", "SYSTEM|DATABASE")) + COMPLETE_WITH_QUERY(Query_for_list_of_databases + " UNION SELECT 'CONCURRENTLY'"); + else if (Matches("REINDEX", "TABLE", "CONCURRENTLY")) + COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_indexables, NULL); + else if (Matches("REINDEX", "INDEX", "CONCURRENTLY")) + COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_indexes, NULL); + else if (Matches("REINDEX", "SCHEMA", "CONCURRENTLY")) + COMPLETE_WITH_QUERY(Query_for_list_of_schemas); + else if (Matches("REINDEX", "SYSTEM|DATABASE", "CONCURRENTLY")) COMPLETE_WITH_QUERY(Query_for_list_of_databases); /* SECURITY LABEL */ diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h index 5dea27016e..24b47d4fc6 100644 --- a/src/include/catalog/dependency.h +++ b/src/include/catalog/dependency.h @@ -251,6 +251,9 @@ extern long changeDependencyFor(Oid classId, Oid objectId, Oid refClassId, Oid oldRefObjectId, Oid newRefObjectId); +extern long changeDependenciesOn(Oid refClassId, Oid oldRefObjectId, + Oid newRefObjectId); + extern Oid getExtensionOfObject(Oid classId, Oid objectId); extern bool sequenceIsOwned(Oid seqId, char deptype, Oid *tableId, int32 *colId); @@ -261,6 +264,8 @@ extern Oid get_constraint_index(Oid constraintId); extern Oid get_index_constraint(Oid indexId); +extern List *get_index_ref_constraints(Oid indexId); + /* in pg_shdepend.c */ extern void recordSharedDependencyOn(ObjectAddress *depender, diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index 330c481a8b..84dd900dd6 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -65,6 +65,7 @@ extern Oid index_create(Relation heapRelation, Oid *classObjectId, int16 *coloptions, Datum reloptions, + TupleDesc tupdesc, bits16 flags, bits16 constr_flags, bool allow_system_table_mods, @@ -77,6 +78,22 @@ extern Oid index_create(Relation heapRelation, #define INDEX_CONSTR_CREATE_UPDATE_INDEX (1 << 3) #define INDEX_CONSTR_CREATE_REMOVE_OLD_DEPS (1 << 4) +extern Oid index_concurrently_create_copy(Relation heapRelation, + Oid oldIndexId, + const char *newName); + +extern void index_concurrently_build(Oid heapOid, + Oid indexOid); + +extern void index_concurrently_swap(Oid newIndexId, + Oid oldIndexId, + const char *oldName); + +extern void index_concurrently_set_dead(Oid heapOid, + Oid indexOid); + +extern void index_concurrently_drop(Oid indexId); + extern ObjectAddress index_constraint_create(Relation heapRelation, Oid indexRelationId, Oid parentConstraintId, diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h index e592a914a4..e11caf2cd1 100644 --- a/src/include/commands/defrem.h +++ b/src/include/commands/defrem.h @@ -34,10 +34,10 @@ extern ObjectAddress DefineIndex(Oid relationId, bool check_not_in_use, bool skip_build, bool quiet); -extern void ReindexIndex(RangeVar *indexRelation, int options); -extern Oid ReindexTable(RangeVar *relation, int options); +extern void ReindexIndex(RangeVar *indexRelation, int options, bool concurrent); +extern Oid ReindexTable(RangeVar *relation, int options, bool concurrent); extern void ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, - int options); + int options, bool concurrent); extern char *makeObjectName(const char *name1, const char *name2, const char *label); extern char *ChooseRelationName(const char *name1, const char *name2, diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 4ec8a83541..fd353ed7bd 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -3307,6 +3307,7 @@ typedef struct ReindexStmt RangeVar *relation; /* Table or index to reindex */ const char *name; /* name of database to reindex */ int options; /* Reindex options flags */ + bool concurrent; /* reindex concurrently? */ } ReindexStmt; /* ---------------------- diff --git a/src/test/isolation/expected/reindex-concurrently.out b/src/test/isolation/expected/reindex-concurrently.out new file mode 100644 index 0000000000..9e04169b2f --- /dev/null +++ b/src/test/isolation/expected/reindex-concurrently.out @@ -0,0 +1,78 @@ +Parsed test spec with 3 sessions + +starting permutation: reindex sel1 upd2 ins2 del2 end1 end2 +step reindex: REINDEX TABLE CONCURRENTLY reind_con_tab; +step sel1: SELECT data FROM reind_con_tab WHERE id = 3; +data + +aaaa +step upd2: UPDATE reind_con_tab SET data = 'bbbb' WHERE id = 3; +step ins2: INSERT INTO reind_con_tab(data) VALUES ('cccc'); +step del2: DELETE FROM reind_con_tab WHERE data = 'cccc'; +step end1: COMMIT; +step end2: COMMIT; + +starting permutation: sel1 reindex upd2 ins2 del2 end1 end2 +step sel1: SELECT data FROM reind_con_tab WHERE id = 3; +data + +aaaa +step reindex: REINDEX TABLE CONCURRENTLY reind_con_tab; +step upd2: UPDATE reind_con_tab SET data = 'bbbb' WHERE id = 3; +step ins2: INSERT INTO reind_con_tab(data) VALUES ('cccc'); +step del2: DELETE FROM reind_con_tab WHERE data = 'cccc'; +step end1: COMMIT; +step end2: COMMIT; +step reindex: <... completed> + +starting permutation: sel1 upd2 reindex ins2 del2 end1 end2 +step sel1: SELECT data FROM reind_con_tab WHERE id = 3; +data + +aaaa +step upd2: UPDATE reind_con_tab SET data = 'bbbb' WHERE id = 3; +step reindex: REINDEX TABLE CONCURRENTLY reind_con_tab; +step ins2: INSERT INTO reind_con_tab(data) VALUES ('cccc'); +step del2: DELETE FROM reind_con_tab WHERE data = 'cccc'; +step end1: COMMIT; +step end2: COMMIT; +step reindex: <... completed> + +starting permutation: sel1 upd2 ins2 reindex del2 end1 end2 +step sel1: SELECT data FROM reind_con_tab WHERE id = 3; +data + +aaaa +step upd2: UPDATE reind_con_tab SET data = 'bbbb' WHERE id = 3; +step ins2: INSERT INTO reind_con_tab(data) VALUES ('cccc'); +step reindex: REINDEX TABLE CONCURRENTLY reind_con_tab; +step del2: DELETE FROM reind_con_tab WHERE data = 'cccc'; +step end1: COMMIT; +step end2: COMMIT; +step reindex: <... completed> + +starting permutation: sel1 upd2 ins2 del2 reindex end1 end2 +step sel1: SELECT data FROM reind_con_tab WHERE id = 3; +data + +aaaa +step upd2: UPDATE reind_con_tab SET data = 'bbbb' WHERE id = 3; +step ins2: INSERT INTO reind_con_tab(data) VALUES ('cccc'); +step del2: DELETE FROM reind_con_tab WHERE data = 'cccc'; +step reindex: REINDEX TABLE CONCURRENTLY reind_con_tab; +step end1: COMMIT; +step end2: COMMIT; +step reindex: <... completed> + +starting permutation: sel1 upd2 ins2 del2 end1 reindex end2 +step sel1: SELECT data FROM reind_con_tab WHERE id = 3; +data + +aaaa +step upd2: UPDATE reind_con_tab SET data = 'bbbb' WHERE id = 3; +step ins2: INSERT INTO reind_con_tab(data) VALUES ('cccc'); +step del2: DELETE FROM reind_con_tab WHERE data = 'cccc'; +step end1: COMMIT; +step reindex: REINDEX TABLE CONCURRENTLY reind_con_tab; +step end2: COMMIT; +step reindex: <... completed> diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule index 91d9d90135..e32886bacb 100644 --- a/src/test/isolation/isolation_schedule +++ b/src/test/isolation/isolation_schedule @@ -42,6 +42,7 @@ test: multixact-no-forget test: lock-committed-update test: lock-committed-keyupdate test: update-locked-tuple +test: reindex-concurrently test: propagate-lock-delete test: tuplelock-conflict test: tuplelock-update diff --git a/src/test/isolation/specs/reindex-concurrently.spec b/src/test/isolation/specs/reindex-concurrently.spec new file mode 100644 index 0000000000..eb59fe0cba --- /dev/null +++ b/src/test/isolation/specs/reindex-concurrently.spec @@ -0,0 +1,40 @@ +# REINDEX CONCURRENTLY +# +# Ensure that concurrent operations work correctly when a REINDEX is performed +# concurrently. + +setup +{ + CREATE TABLE reind_con_tab(id serial primary key, data text); + INSERT INTO reind_con_tab(data) VALUES ('aa'); + INSERT INTO reind_con_tab(data) VALUES ('aaa'); + INSERT INTO reind_con_tab(data) VALUES ('aaaa'); + INSERT INTO reind_con_tab(data) VALUES ('aaaaa'); +} + +teardown +{ + DROP TABLE reind_con_tab; +} + +session "s1" +setup { BEGIN; } +step "sel1" { SELECT data FROM reind_con_tab WHERE id = 3; } +step "end1" { COMMIT; } + +session "s2" +setup { BEGIN; } +step "upd2" { UPDATE reind_con_tab SET data = 'bbbb' WHERE id = 3; } +step "ins2" { INSERT INTO reind_con_tab(data) VALUES ('cccc'); } +step "del2" { DELETE FROM reind_con_tab WHERE data = 'cccc'; } +step "end2" { COMMIT; } + +session "s3" +step "reindex" { REINDEX TABLE CONCURRENTLY reind_con_tab; } + +permutation "reindex" "sel1" "upd2" "ins2" "del2" "end1" "end2" +permutation "sel1" "reindex" "upd2" "ins2" "del2" "end1" "end2" +permutation "sel1" "upd2" "reindex" "ins2" "del2" "end1" "end2" +permutation "sel1" "upd2" "ins2" "reindex" "del2" "end1" "end2" +permutation "sel1" "upd2" "ins2" "del2" "reindex" "end1" "end2" +permutation "sel1" "upd2" "ins2" "del2" "end1" "reindex" "end2" diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out index 46deb55c67..f10ff3c5c1 100644 --- a/src/test/regress/expected/create_index.out +++ b/src/test/regress/expected/create_index.out @@ -3292,3 +3292,98 @@ DROP ROLE regress_reindexuser; \set VERBOSITY terse \\ -- suppress cascade details DROP SCHEMA schema_to_reindex CASCADE; NOTICE: drop cascades to 6 other objects +RESET client_min_messages; +RESET search_path; +-- +-- Check behavior of REINDEX and REINDEX CONCURRENTLY +-- +CREATE TABLE concur_reindex_tab (c1 int); +-- REINDEX +REINDEX TABLE concur_reindex_tab; -- notice +NOTICE: table "concur_reindex_tab" has no indexes +REINDEX TABLE CONCURRENTLY concur_reindex_tab; -- notice +NOTICE: table "concur_reindex_tab" has no indexes +ALTER TABLE concur_reindex_tab ADD COLUMN c2 text; -- add toast index +-- Normal index with integer column +CREATE UNIQUE INDEX concur_reindex_ind1 ON concur_reindex_tab(c1); +-- Normal index with text column +CREATE INDEX concur_reindex_ind2 ON concur_reindex_tab(c2); +-- UNIQUE index with expression +CREATE UNIQUE INDEX concur_reindex_ind3 ON concur_reindex_tab(abs(c1)); +-- Duplicate column names +CREATE INDEX concur_reindex_ind4 ON concur_reindex_tab(c1, c1, c2); +-- Create table for check on foreign key dependence switch with indexes swapped +ALTER TABLE concur_reindex_tab ADD PRIMARY KEY USING INDEX concur_reindex_ind1; +CREATE TABLE concur_reindex_tab2 (c1 int REFERENCES concur_reindex_tab); +INSERT INTO concur_reindex_tab VALUES (1, 'a'); +INSERT INTO concur_reindex_tab VALUES (2, 'a'); +-- Reindex of exclusion constraint +ALTER TABLE concur_reindex_tab ADD COLUMN c3 int4range, ADD EXCLUDE USING gist (c3 WITH &&); +INSERT INTO concur_reindex_tab VALUES (3, 'a', '[1,2]'); +REINDEX TABLE concur_reindex_tab; +INSERT INTO concur_reindex_tab VALUES (4, 'a', '[2,4]'); +ERROR: conflicting key value violates exclusion constraint "concur_reindex_tab_c3_excl" +-- Check materialized views +CREATE MATERIALIZED VIEW concur_reindex_matview AS SELECT * FROM concur_reindex_tab; +REINDEX INDEX CONCURRENTLY concur_reindex_ind1; +REINDEX TABLE CONCURRENTLY concur_reindex_tab; +REINDEX TABLE CONCURRENTLY concur_reindex_matview; +-- Check that comments are preserved +CREATE TABLE testcomment (i int); +CREATE INDEX testcomment_idx1 ON testcomment (i); +COMMENT ON INDEX testcomment_idx1 IS 'test comment'; +SELECT obj_description('testcomment_idx1'::regclass, 'pg_class'); + obj_description +----------------- + test comment +(1 row) + +REINDEX TABLE testcomment; +SELECT obj_description('testcomment_idx1'::regclass, 'pg_class'); + obj_description +----------------- + test comment +(1 row) + +REINDEX TABLE CONCURRENTLY testcomment ; +SELECT obj_description('testcomment_idx1'::regclass, 'pg_class'); + obj_description +----------------- + test comment +(1 row) + +DROP TABLE testcomment; +-- Check errors +-- Cannot run inside a transaction block +BEGIN; +REINDEX TABLE CONCURRENTLY concur_reindex_tab; +ERROR: REINDEX CONCURRENTLY cannot run inside a transaction block +COMMIT; +REINDEX TABLE CONCURRENTLY pg_database; -- no shared relation +ERROR: concurrent reindex is not supported for shared relations +REINDEX TABLE CONCURRENTLY pg_class; -- no catalog relations +ERROR: concurrent reindex is not supported for catalog relations +REINDEX SYSTEM CONCURRENTLY postgres; -- not allowed for SYSTEM +ERROR: can only reindex the currently open database +-- Warns about catalog relations +REINDEX SCHEMA CONCURRENTLY pg_catalog; +WARNING: concurrent reindex is not supported for catalog relations, skipping all +-- Check the relation status, there should not be invalid indexes +\d concur_reindex_tab + Table "public.concur_reindex_tab" + Column | Type | Collation | Nullable | Default +--------+-----------+-----------+----------+--------- + c1 | integer | | not null | + c2 | text | | | + c3 | int4range | | | +Indexes: + "concur_reindex_ind1" PRIMARY KEY, btree (c1) + "concur_reindex_ind3" UNIQUE, btree (abs(c1)) + "concur_reindex_ind2" btree (c2) + "concur_reindex_ind4" btree (c1, c1, c2) + "concur_reindex_tab_c3_excl" EXCLUDE USING gist (c3 WITH &&) +Referenced by: + TABLE "concur_reindex_tab2" CONSTRAINT "concur_reindex_tab2_c1_fkey" FOREIGN KEY (c1) REFERENCES concur_reindex_tab(c1) + +DROP MATERIALIZED VIEW concur_reindex_matview; +DROP TABLE concur_reindex_tab, concur_reindex_tab2; diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql index 59da6b6592..1669f6a0d8 100644 --- a/src/test/regress/sql/create_index.sql +++ b/src/test/regress/sql/create_index.sql @@ -1207,3 +1207,64 @@ CREATE ROLE regress_reindexuser NOLOGIN; DROP ROLE regress_reindexuser; \set VERBOSITY terse \\ -- suppress cascade details DROP SCHEMA schema_to_reindex CASCADE; +RESET client_min_messages; +RESET search_path; + +-- +-- Check behavior of REINDEX and REINDEX CONCURRENTLY +-- + +CREATE TABLE concur_reindex_tab (c1 int); +-- REINDEX +REINDEX TABLE concur_reindex_tab; -- notice +REINDEX TABLE CONCURRENTLY concur_reindex_tab; -- notice +ALTER TABLE concur_reindex_tab ADD COLUMN c2 text; -- add toast index +-- Normal index with integer column +CREATE UNIQUE INDEX concur_reindex_ind1 ON concur_reindex_tab(c1); +-- Normal index with text column +CREATE INDEX concur_reindex_ind2 ON concur_reindex_tab(c2); +-- UNIQUE index with expression +CREATE UNIQUE INDEX concur_reindex_ind3 ON concur_reindex_tab(abs(c1)); +-- Duplicate column names +CREATE INDEX concur_reindex_ind4 ON concur_reindex_tab(c1, c1, c2); +-- Create table for check on foreign key dependence switch with indexes swapped +ALTER TABLE concur_reindex_tab ADD PRIMARY KEY USING INDEX concur_reindex_ind1; +CREATE TABLE concur_reindex_tab2 (c1 int REFERENCES concur_reindex_tab); +INSERT INTO concur_reindex_tab VALUES (1, 'a'); +INSERT INTO concur_reindex_tab VALUES (2, 'a'); +-- Reindex of exclusion constraint +ALTER TABLE concur_reindex_tab ADD COLUMN c3 int4range, ADD EXCLUDE USING gist (c3 WITH &&); +INSERT INTO concur_reindex_tab VALUES (3, 'a', '[1,2]'); +REINDEX TABLE concur_reindex_tab; +INSERT INTO concur_reindex_tab VALUES (4, 'a', '[2,4]'); +-- Check materialized views +CREATE MATERIALIZED VIEW concur_reindex_matview AS SELECT * FROM concur_reindex_tab; +REINDEX INDEX CONCURRENTLY concur_reindex_ind1; +REINDEX TABLE CONCURRENTLY concur_reindex_tab; +REINDEX TABLE CONCURRENTLY concur_reindex_matview; +-- Check that comments are preserved +CREATE TABLE testcomment (i int); +CREATE INDEX testcomment_idx1 ON testcomment (i); +COMMENT ON INDEX testcomment_idx1 IS 'test comment'; +SELECT obj_description('testcomment_idx1'::regclass, 'pg_class'); +REINDEX TABLE testcomment; +SELECT obj_description('testcomment_idx1'::regclass, 'pg_class'); +REINDEX TABLE CONCURRENTLY testcomment ; +SELECT obj_description('testcomment_idx1'::regclass, 'pg_class'); +DROP TABLE testcomment; + +-- Check errors +-- Cannot run inside a transaction block +BEGIN; +REINDEX TABLE CONCURRENTLY concur_reindex_tab; +COMMIT; +REINDEX TABLE CONCURRENTLY pg_database; -- no shared relation +REINDEX TABLE CONCURRENTLY pg_class; -- no catalog relations +REINDEX SYSTEM CONCURRENTLY postgres; -- not allowed for SYSTEM +-- Warns about catalog relations +REINDEX SCHEMA CONCURRENTLY pg_catalog; + +-- Check the relation status, there should not be invalid indexes +\d concur_reindex_tab +DROP MATERIALIZED VIEW concur_reindex_matview; +DROP TABLE concur_reindex_tab, concur_reindex_tab2; base-commit: e77cfa54d700557ea700d47454c9e570f20f1841 -- 2.20.1