From 94f37760932860699494fdea2fee6c18f24c3380 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Mon, 23 Oct 2017 10:18:38 +0200 Subject: [PATCH v12] Local partitioned indexes When CREATE INDEX is run on a partitioned table, create catalog entries for an index on the partitioned table (which is just a placeholder since the table proper has no data of its own), and recurse to create actual indexes on the existing partitions; create them in future partitions also. As a convenience gadget, if the new index definition matches some existing index in partitions, these are picked up and used instead of creating new ones. Whichever way these indexes come about, they become attached to the index on the parent table and are dropped alongside it, and cannot be dropped on isolation unless they are detached first. To support pg_dump'ing these indexes, add commands CREATE INDEX ON ONLY (which creates the index on the parent partitioned table, without recursing) and ALTER INDEX ATTACH PARTITION (which is used after the indexes have been created individually on each partition, to attach them to the parent index). These reconstruct prior database state exactly. Reviewed-by: Robert Haas, Amit Langote, Jesper Pedersen, Simon Riggs, David Rowley Discussion: https://postgr.es/m/20171113170646.gzweigyrgg6pwsg4@alvherre.pgsql --- doc/src/sgml/catalogs.sgml | 22 ++ doc/src/sgml/ref/alter_index.sgml | 14 + doc/src/sgml/ref/alter_table.sgml | 8 +- doc/src/sgml/ref/create_index.sgml | 33 +- doc/src/sgml/ref/reindex.sgml | 5 + src/backend/access/common/reloptions.c | 1 + src/backend/access/heap/heapam.c | 9 +- src/backend/access/index/indexam.c | 3 +- src/backend/bootstrap/bootparse.y | 2 + src/backend/catalog/aclchk.c | 9 +- src/backend/catalog/dependency.c | 12 +- src/backend/catalog/heap.c | 1 + src/backend/catalog/index.c | 136 +++++++- src/backend/catalog/objectaddress.c | 5 +- src/backend/catalog/pg_depend.c | 13 +- src/backend/catalog/pg_inherits.c | 31 ++ src/backend/catalog/toasting.c | 2 + src/backend/commands/indexcmds.c | 332 +++++++++++++++++- src/backend/commands/tablecmds.c | 527 +++++++++++++++++++++++++++-- src/backend/nodes/copyfuncs.c | 1 + src/backend/nodes/equalfuncs.c | 1 + src/backend/nodes/outfuncs.c | 1 + src/backend/optimizer/util/plancat.c | 10 +- src/backend/parser/gram.y | 32 +- src/backend/parser/parse_utilcmd.c | 65 ++-- src/backend/tcop/utility.c | 9 + src/backend/utils/adt/amutils.c | 3 +- src/backend/utils/adt/ruleutils.c | 17 +- src/backend/utils/cache/relcache.c | 30 +- src/bin/pg_dump/common.c | 102 ++++++ src/bin/pg_dump/pg_dump.c | 90 ++++- src/bin/pg_dump/pg_dump.h | 11 + src/bin/pg_dump/pg_dump_sort.c | 56 +++- src/bin/psql/describe.c | 20 +- src/bin/psql/tab-complete.c | 34 +- src/include/catalog/catversion.h | 2 +- src/include/catalog/dependency.h | 15 + src/include/catalog/index.h | 7 + src/include/catalog/pg_class.h | 1 + src/include/catalog/pg_inherits_fn.h | 2 + src/include/commands/defrem.h | 3 +- src/include/nodes/execnodes.h | 1 + src/include/nodes/parsenodes.h | 7 +- src/include/parser/parse_utilcmd.h | 3 + src/test/regress/expected/alter_table.out | 65 +++- src/test/regress/expected/indexing.out | 541 ++++++++++++++++++++++++++++++ src/test/regress/parallel_schedule | 2 +- src/test/regress/serial_schedule | 1 + src/test/regress/sql/alter_table.sql | 16 + src/test/regress/sql/indexing.sql | 244 ++++++++++++++ 50 files changed, 2415 insertions(+), 142 deletions(-) create mode 100644 src/test/regress/expected/indexing.out create mode 100644 src/test/regress/sql/indexing.sql diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 3f02202caf..192891bc4f 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -2996,6 +2996,28 @@ SCRAM-SHA-256$<iteration count>:&l + DEPENDENCY_INTERNAL_AUTO (I) + + + The dependent object was created as part of creation of the + referenced object, and is really just a part of its internal + implementation. A DROP of the dependent object + will be disallowed outright (we'll tell the user to issue a + DROP against the referenced object, instead). + While a regular internal dependency will prevent + the dependent object from being dropped while any such dependencies remain, + DEPENDENCY_INTERNAL_AUTO will allow such a drop as long + as the object can be found by following any of such dependencies. + Example: an index + on a partition is made internal-auto-dependent on both the partition + itself as well as on the index on the parent partitioned table; + so the partition index is dropped together with either partition it indexes, + or with the parent index it is attached to. + + + + + DEPENDENCY_EXTENSION (e) diff --git a/doc/src/sgml/ref/alter_index.sgml b/doc/src/sgml/ref/alter_index.sgml index e54237272c..3984686d67 100644 --- a/doc/src/sgml/ref/alter_index.sgml +++ b/doc/src/sgml/ref/alter_index.sgml @@ -23,6 +23,7 @@ PostgreSQL documentation ALTER INDEX [ IF EXISTS ] name RENAME TO new_name ALTER INDEX [ IF EXISTS ] name SET TABLESPACE tablespace_name +ALTER INDEX name ATTACH PARTITION index_name ALTER INDEX name DEPENDS ON EXTENSION extension_name ALTER INDEX [ IF EXISTS ] name SET ( storage_parameter = value [, ... ] ) ALTER INDEX [ IF EXISTS ] name RESET ( storage_parameter [, ... ] ) @@ -76,6 +77,19 @@ ALTER INDEX ALL IN TABLESPACE name + ATTACH + + + Causes the named index to become attached to the altered index. + The named index must be on a partition of the table containing the + index being altered, and have an equivalent definition. An attached + index cannot be dropped by itself, and will automatically be dropped + if its parent index is dropped. + + + + + DEPENDS ON EXTENSION diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml index 7bcf242846..0a2f3e3646 100644 --- a/doc/src/sgml/ref/alter_table.sgml +++ b/doc/src/sgml/ref/alter_table.sgml @@ -783,7 +783,10 @@ ALTER TABLE [ IF EXISTS ] name as a partition of the target table. The table can be attached as a partition for specific values using FOR VALUES or as a default partition by using DEFAULT - . + . For each index in the target table, a corresponding + one will be created in the attached table; or, if an equivalent + index already exists, will be attached to the target table's index, + as if ALTER INDEX ATTACH had been executed. @@ -844,7 +847,8 @@ ALTER TABLE [ IF EXISTS ] name This form detaches specified partition of the target table. The detached partition continues to exist as a standalone table, but no longer has any - ties to the table from which it was detached. + ties to the table from which it was detached. Any indexes that were + attached to the target table's indexes are detached. diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml index 025537575b..5137fe6383 100644 --- a/doc/src/sgml/ref/create_index.sgml +++ b/doc/src/sgml/ref/create_index.sgml @@ -21,7 +21,7 @@ PostgreSQL documentation -CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] name ] ON table_name [ USING method ] +CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] name ] ON [ ONLY ] table_name [ USING method ] ( { column_name | ( expression ) } [ COLLATE collation ] [ opclass ] [ ASC | DESC ] [ NULLS { FIRST | LAST } ] [, ...] ) [ WITH ( storage_parameter = value [, ... ] ) ] [ TABLESPACE tablespace_name ] @@ -152,6 +152,16 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] + ONLY + + + Indicates not to recurse creating indexes on partitions, if the + table is partitioned. The default is to recurse. + + + + + table_name @@ -546,6 +556,27 @@ Indexes: + When CREATE INDEX is invoked on a partitioned + table, the default behavior is to recurse to all partitions to ensure + they all have matching indexes. + Each partition is first checked to determine whether an equivalent + index already exists, and if so, that index will become attached as a + partition index to the index being created, which will become its + parent index. + If no matching index exists, a new index will be created and + automatically attached; the name of the new index in each partition + will be determined as if no index name had been specified in the + command. + If the ONLY option is specified, no recursion + is done, and the index is marked invalid + (ALTER INDEX ... ATTACH PARTITION turns the index + valid, once all partitions acquire the index.) Note, however, that + any partition that is created in the future using + CREATE TABLE ... PARTITION OF will automatically + contain the index regardless of whether this option was specified. + + + For index methods that support ordered scans (currently, only B-tree), the optional clauses ASC, DESC, NULLS FIRST, and/or NULLS LAST can be specified to modify diff --git a/doc/src/sgml/ref/reindex.sgml b/doc/src/sgml/ref/reindex.sgml index 79f6931c6a..1c21fafb80 100644 --- a/doc/src/sgml/ref/reindex.sgml +++ b/doc/src/sgml/ref/reindex.sgml @@ -231,6 +231,11 @@ REINDEX [ ( VERBOSE ) ] { INDEX | TABLE | SCHEMA | DATABASE | SYSTEM } + + Reindexing partitioned tables or partitioned indexes is not supported. + Each individual partition can be reindexed separately instead. + + diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index 425bc5d06e..274f7aa8e9 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -993,6 +993,7 @@ extractRelOptions(HeapTuple tuple, TupleDesc tupdesc, options = view_reloptions(datum, false); break; case RELKIND_INDEX: + case RELKIND_PARTITIONED_INDEX: options = index_reloptions(amoptions, datum, false); break; case RELKIND_FOREIGN_TABLE: diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index dbc8f2d6c7..be263850cd 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -1293,7 +1293,8 @@ heap_open(Oid relationId, LOCKMODE lockmode) r = relation_open(relationId, lockmode); - if (r->rd_rel->relkind == RELKIND_INDEX) + if (r->rd_rel->relkind == RELKIND_INDEX || + r->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is an index", @@ -1321,7 +1322,8 @@ heap_openrv(const RangeVar *relation, LOCKMODE lockmode) r = relation_openrv(relation, lockmode); - if (r->rd_rel->relkind == RELKIND_INDEX) + if (r->rd_rel->relkind == RELKIND_INDEX || + r->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is an index", @@ -1353,7 +1355,8 @@ heap_openrv_extended(const RangeVar *relation, LOCKMODE lockmode, if (r) { - if (r->rd_rel->relkind == RELKIND_INDEX) + if (r->rd_rel->relkind == RELKIND_INDEX || + r->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is an index", diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 1b61cd9515..91247f0fa5 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -154,7 +154,8 @@ index_open(Oid relationId, LOCKMODE lockmode) r = relation_open(relationId, lockmode); - if (r->rd_rel->relkind != RELKIND_INDEX) + if (r->rd_rel->relkind != RELKIND_INDEX && + r->rd_rel->relkind != RELKIND_PARTITIONED_INDEX) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not an index", diff --git a/src/backend/bootstrap/bootparse.y b/src/backend/bootstrap/bootparse.y index 8c52846a92..dfd53fa054 100644 --- a/src/backend/bootstrap/bootparse.y +++ b/src/backend/bootstrap/bootparse.y @@ -321,6 +321,7 @@ Boot_DeclareIndexStmt: DefineIndex(relationId, stmt, $4, + InvalidOid, false, false, false, @@ -365,6 +366,7 @@ Boot_DeclareUniqueIndexStmt: DefineIndex(relationId, stmt, $5, + InvalidOid, false, false, false, diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c index fac80612b8..50a2e2681b 100644 --- a/src/backend/catalog/aclchk.c +++ b/src/backend/catalog/aclchk.c @@ -1824,7 +1824,8 @@ ExecGrant_Relation(InternalGrant *istmt) pg_class_tuple = (Form_pg_class) GETSTRUCT(tuple); /* Not sensible to grant on an index */ - if (pg_class_tuple->relkind == RELKIND_INDEX) + if (pg_class_tuple->relkind == RELKIND_INDEX || + pg_class_tuple->relkind == RELKIND_PARTITIONED_INDEX) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is an index", @@ -5405,7 +5406,8 @@ recordExtObjInitPriv(Oid objoid, Oid classoid) pg_class_tuple = (Form_pg_class) GETSTRUCT(tuple); /* Indexes don't have permissions */ - if (pg_class_tuple->relkind == RELKIND_INDEX) + if (pg_class_tuple->relkind == RELKIND_INDEX || + pg_class_tuple->relkind == RELKIND_PARTITIONED_INDEX) return; /* Composite types don't have permissions either */ @@ -5690,7 +5692,8 @@ removeExtObjInitPriv(Oid objoid, Oid classoid) pg_class_tuple = (Form_pg_class) GETSTRUCT(tuple); /* Indexes don't have permissions */ - if (pg_class_tuple->relkind == RELKIND_INDEX) + if (pg_class_tuple->relkind == RELKIND_INDEX || + pg_class_tuple->relkind == RELKIND_PARTITIONED_INDEX) return; /* Composite types don't have permissions either */ diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 269111b4c1..a46e859488 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -582,6 +582,7 @@ findDependentObjects(const ObjectAddress *object, /* FALL THRU */ case DEPENDENCY_INTERNAL: + case DEPENDENCY_INTERNAL_AUTO: /* * This object is part of the internal implementation of @@ -633,11 +634,17 @@ findDependentObjects(const ObjectAddress *object, * transform this deletion request into a delete of this * owning object. * + * For INTERNAL_AUTO dependencies, we don't enforce this; + * in other words, we don't follow the links back to the + * owning object. + * * First, release caller's lock on this object and get * deletion lock on the owning object. (We must release * caller's lock to avoid deadlock against a concurrent * deletion of the owning object.) */ + if (foundDep->deptype == DEPENDENCY_INTERNAL_AUTO) + break; ReleaseDeletionLock(object); AcquireDeletionLock(&otherObject, 0); @@ -675,6 +682,7 @@ findDependentObjects(const ObjectAddress *object, /* And we're done here. */ systable_endscan(scan); return; + case DEPENDENCY_PIN: /* @@ -762,6 +770,7 @@ findDependentObjects(const ObjectAddress *object, case DEPENDENCY_AUTO_EXTENSION: subflags = DEPFLAG_AUTO; break; + case DEPENDENCY_INTERNAL_AUTO: case DEPENDENCY_INTERNAL: subflags = DEPFLAG_INTERNAL; break; @@ -1109,7 +1118,8 @@ doDeletion(const ObjectAddress *object, int flags) { char relKind = get_rel_relkind(object->objectId); - if (relKind == RELKIND_INDEX) + if (relKind == RELKIND_INDEX || + relKind == RELKIND_PARTITIONED_INDEX) { bool concurrent = ((flags & PERFORM_DELETION_CONCURRENTLY) != 0); diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 089b7965f2..99f4d59863 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -294,6 +294,7 @@ heap_create(const char *relname, case RELKIND_COMPOSITE_TYPE: case RELKIND_FOREIGN_TABLE: case RELKIND_PARTITIONED_TABLE: + case RELKIND_PARTITIONED_INDEX: create_storage = false; /* diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 330488b96f..6c4649c594 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -41,6 +41,7 @@ #include "catalog/pg_collation.h" #include "catalog/pg_constraint.h" #include "catalog/pg_constraint_fn.h" +#include "catalog/pg_depend.h" #include "catalog/pg_operator.h" #include "catalog/pg_opclass.h" #include "catalog/pg_tablespace.h" @@ -98,6 +99,7 @@ static void InitializeAttributeOids(Relation indexRelation, int numatts, Oid indexoid); static void AppendAttributeTuples(Relation indexRelation, int numatts); static void UpdateIndexRelation(Oid indexoid, Oid heapoid, + Oid parentIndexId, IndexInfo *indexInfo, Oid *collationOids, Oid *classOids, @@ -105,7 +107,8 @@ static void UpdateIndexRelation(Oid indexoid, Oid heapoid, bool primary, bool isexclusion, bool immediate, - bool isvalid); + bool isvalid, + bool isready); static void index_update_stats(Relation rel, bool hasindex, bool isprimary, double reltuples); @@ -551,6 +554,7 @@ AppendAttributeTuples(Relation indexRelation, int numatts) static void UpdateIndexRelation(Oid indexoid, Oid heapoid, + Oid parentIndexOid, IndexInfo *indexInfo, Oid *collationOids, Oid *classOids, @@ -558,7 +562,8 @@ UpdateIndexRelation(Oid indexoid, bool primary, bool isexclusion, bool immediate, - bool isvalid) + bool isvalid, + bool isready) { int2vector *indkey; oidvector *indcollation; @@ -632,8 +637,7 @@ UpdateIndexRelation(Oid indexoid, values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false); values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid); values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false); - /* we set isvalid and isready the same way */ - values[Anum_pg_index_indisready - 1] = BoolGetDatum(isvalid); + values[Anum_pg_index_indisready - 1] = BoolGetDatum(isready); values[Anum_pg_index_indislive - 1] = BoolGetDatum(true); values[Anum_pg_index_indisreplident - 1] = BoolGetDatum(false); values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey); @@ -670,6 +674,8 @@ UpdateIndexRelation(Oid indexoid, * indexRelationId: normally, pass InvalidOid to let this routine * generate an OID for the index. During bootstrap this may be * nonzero to specify a preselected OID. + * parentIndexRelid: if creating an index partition, the OID of the + * parent index; otherwise InvalidOid. * relFileNode: normally, pass InvalidOid to get new storage. May be * nonzero to attach an existing valid build. * indexInfo: same info executor uses to insert into the index @@ -695,6 +701,8 @@ UpdateIndexRelation(Oid indexoid, * INDEX_CREATE_IF_NOT_EXISTS: * do not throw an error if a relation with the same name * already exists. + * INDEX_CREATE_PARTITIONED: + * create a partitioned index (table must be partitioned) * constr_flags: flags passed to index_constraint_create * (only if INDEX_CREATE_ADD_CONSTRAINT is set) * allow_system_table_mods: allow table to be a system catalog @@ -706,6 +714,7 @@ Oid index_create(Relation heapRelation, const char *indexRelationName, Oid indexRelationId, + Oid parentIndexRelid, Oid relFileNode, IndexInfo *indexInfo, List *indexColNames, @@ -731,12 +740,18 @@ index_create(Relation heapRelation, int i; char relpersistence; bool isprimary = (flags & INDEX_CREATE_IS_PRIMARY) != 0; + bool invalid = (flags & INDEX_CREATE_INVALID) != 0; bool concurrent = (flags & INDEX_CREATE_CONCURRENT) != 0; + bool partitioned = (flags & INDEX_CREATE_PARTITIONED) != 0; + char relkind; /* constraint flags can only be set when a constraint is requested */ Assert((constr_flags == 0) || ((flags & INDEX_CREATE_ADD_CONSTRAINT) != 0)); + /* partitioned indexes must never be "built" by themselves */ + Assert(!partitioned || (flags & INDEX_CREATE_SKIP_BUILD)); + relkind = partitioned ? RELKIND_PARTITIONED_INDEX : RELKIND_INDEX; is_exclusion = (indexInfo->ii_ExclusionOps != NULL); pg_class = heap_open(RelationRelationId, RowExclusiveLock); @@ -864,7 +879,7 @@ index_create(Relation heapRelation, indexRelationId, relFileNode, indexTupDesc, - RELKIND_INDEX, + relkind, relpersistence, shared_relation, mapped_relation, @@ -921,12 +936,18 @@ index_create(Relation heapRelation, * (Or, could define a rule to maintain the predicate) --Nels, Feb '92 * ---------------- */ - UpdateIndexRelation(indexRelationId, heapRelationId, indexInfo, + UpdateIndexRelation(indexRelationId, heapRelationId, parentIndexRelid, + indexInfo, collationObjectId, classObjectId, coloptions, isprimary, is_exclusion, (constr_flags & INDEX_CONSTR_CREATE_DEFERRABLE) == 0, + !concurrent && !invalid, !concurrent); + /* update pg_inherits, if needed */ + if (OidIsValid(parentIndexRelid)) + StoreSingleInheritance(indexRelationId, parentIndexRelid, 1); + /* * Register constraint and dependencies for the index. * @@ -978,6 +999,9 @@ index_create(Relation heapRelation, else { bool have_simple_col = false; + DependencyType deptype; + + deptype = OidIsValid(parentIndexRelid) ? DEPENDENCY_INTERNAL_AUTO : DEPENDENCY_AUTO; /* Create auto dependencies on simply-referenced columns */ for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) @@ -988,7 +1012,7 @@ index_create(Relation heapRelation, referenced.objectId = heapRelationId; referenced.objectSubId = indexInfo->ii_KeyAttrNumbers[i]; - recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO); + recordDependencyOn(&myself, &referenced, deptype); have_simple_col = true; } @@ -1006,10 +1030,21 @@ index_create(Relation heapRelation, referenced.objectId = heapRelationId; referenced.objectSubId = 0; - recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO); + recordDependencyOn(&myself, &referenced, deptype); } } + /* Store dependency on parent index, if any */ + if (OidIsValid(parentIndexRelid)) + { + referenced.classId = RelationRelationId; + referenced.objectId = parentIndexRelid; + referenced.objectSubId = 0; + + recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL_AUTO); + } + + /* Store dependency on collations */ /* The default collation is pinned, so don't bother recording it */ for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) @@ -1555,9 +1590,10 @@ index_drop(Oid indexId, bool concurrent) } /* - * Schedule physical removal of the files + * Schedule physical removal of the files (if any) */ - RelationDropStorage(userIndexRelation); + if (userIndexRelation->rd_rel->relkind != RELKIND_PARTITIONED_INDEX) + RelationDropStorage(userIndexRelation); /* * Close and flush the index's relcache entry, to ensure relcache doesn't @@ -1694,12 +1730,65 @@ BuildIndexInfo(Relation index) ii->ii_BrokenHotChain = false; /* set up for possible use by index AM */ + ii->ii_Am = index->rd_rel->relam; ii->ii_AmCache = NULL; ii->ii_Context = CurrentMemoryContext; return ii; } +/* + * CompareIndexInfo + * Compare two IndexInfos, and return true if they are similar enough that + * an index built with one can pass as an index built with the other. + * + * attmap is an attribute map where info2 is input and info1 is output. + */ +bool +CompareIndexInfo(IndexInfo *info1, IndexInfo *info2, AttrNumber *attmap) +{ + int i; + + /* indexes are only equivalent if they have the same access method */ + if (info1->ii_Am != info2->ii_Am) + return false; + + /* and same number of attributes */ + if (info1->ii_NumIndexAttrs != info2->ii_NumIndexAttrs) + return false; + + /* + * and columns match through the attribute map (actual attribute numbers + * might differ!) + */ + for (i = 0; i < info1->ii_NumIndexAttrs; i++) + { + if (attmap[info1->ii_KeyAttrNumbers[i] - 1] != + info2->ii_KeyAttrNumbers[i]) + return false; + } + + /* + * Expression indexes are currently not considered equal. Not needed for + * current callers. + */ + if (info1->ii_Expressions != NIL || info2->ii_Expressions != NIL) + return false; + + /* Index predicates must be identical */ + if (!equal(info1->ii_Predicate, info2->ii_Predicate)) + return false; + + /* No support currently for comparing exclusion indexes. */ + if (info1->ii_ExclusionOps != NULL || info2->ii_ExclusionOps != NULL) + return false; + + if (info1->ii_Unique != info2->ii_Unique) + return false; + + return true; +} + /* ---------------- * BuildSpeculativeIndexInfo * Add extra state to IndexInfo record @@ -1922,6 +2011,9 @@ index_update_stats(Relation rel, elog(ERROR, "could not find tuple for relation %u", relid); rd_rel = (Form_pg_class) GETSTRUCT(tuple); + /* Should this be a more comprehensive test? */ + Assert(rd_rel->relkind != RELKIND_PARTITIONED_INDEX); + /* Apply required updates, if any, to copied tuple */ dirty = false; @@ -3332,6 +3424,14 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence, iRel = index_open(indexId, AccessExclusiveLock); /* + * The case of reindexing partitioned tables and indexes is handled + * differently by upper layers, so this case shouldn't arise. + */ + if (iRel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) + elog(ERROR, "unsupported relation kind for index \"%s\"", + RelationGetRelationName(iRel)); + + /* * Don't allow reindex on temp tables of other backends ... their local * buffer manager is not going to cope. */ @@ -3530,6 +3630,22 @@ reindex_relation(Oid relid, int flags, int options) */ rel = heap_open(relid, ShareLock); + /* + * This may be useful when implemented someday; but that day is not today. + * For now, avoid erroring out when called in a multi-table context + * (REINDEX SCHEMA) and happen to come across a partitioned table. The + * partitions may be reindexed on their own anyway. + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + ereport(WARNING, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("REINDEX of partitioned tables is not yet implemented, skipping \"%s\"", + RelationGetRelationName(rel)))); + heap_close(rel, ShareLock); + return false; + } + toast_relid = rel->rd_rel->reltoastrelid; /* diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c index bc999ca3c4..7576606c1b 100644 --- a/src/backend/catalog/objectaddress.c +++ b/src/backend/catalog/objectaddress.c @@ -1217,7 +1217,8 @@ get_relation_by_qualified_name(ObjectType objtype, List *object, switch (objtype) { case OBJECT_INDEX: - if (relation->rd_rel->relkind != RELKIND_INDEX) + if (relation->rd_rel->relkind != RELKIND_INDEX && + relation->rd_rel->relkind != RELKIND_PARTITIONED_INDEX) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not an index", @@ -3483,6 +3484,7 @@ getRelationDescription(StringInfo buffer, Oid relid) relname); break; case RELKIND_INDEX: + case RELKIND_PARTITIONED_INDEX: appendStringInfo(buffer, _("index %s"), relname); break; @@ -3957,6 +3959,7 @@ getRelationTypeDescription(StringInfo buffer, Oid relid, int32 objectSubId) appendStringInfoString(buffer, "table"); break; case RELKIND_INDEX: + case RELKIND_PARTITIONED_INDEX: appendStringInfoString(buffer, "index"); break; case RELKIND_SEQUENCE: diff --git a/src/backend/catalog/pg_depend.c b/src/backend/catalog/pg_depend.c index 9dfbe123b5..2ea05f350b 100644 --- a/src/backend/catalog/pg_depend.c +++ b/src/backend/catalog/pg_depend.c @@ -656,14 +656,19 @@ get_constraint_index(Oid constraintId) /* * We assume any internal dependency of an index on the constraint - * must be what we are looking for. (The relkind test is just - * paranoia; there shouldn't be any such dependencies otherwise.) + * must be what we are looking for. */ if (deprec->classid == RelationRelationId && deprec->objsubid == 0 && - deprec->deptype == DEPENDENCY_INTERNAL && - get_rel_relkind(deprec->objid) == RELKIND_INDEX) + deprec->deptype == DEPENDENCY_INTERNAL) { + char relkind = get_rel_relkind(deprec->objid); + + /* This is pure paranoia; there shouldn't be any such */ + if (relkind != RELKIND_INDEX && + relkind != RELKIND_PARTITIONED_INDEX) + break; + indexId = deprec->objid; break; } diff --git a/src/backend/catalog/pg_inherits.c b/src/backend/catalog/pg_inherits.c index b32d677347..bde5176cab 100644 --- a/src/backend/catalog/pg_inherits.c +++ b/src/backend/catalog/pg_inherits.c @@ -405,3 +405,34 @@ typeInheritsFrom(Oid subclassTypeId, Oid superclassTypeId) return result; } + +/* + * Create a single pg_inherits row with the given data + */ +void +StoreSingleInheritance(Oid relationId, Oid parentOid, int16 seqNumber) +{ + Datum values[Natts_pg_inherits]; + bool nulls[Natts_pg_inherits]; + HeapTuple tuple; + Relation inhRelation; + + inhRelation = heap_open(InheritsRelationId, RowExclusiveLock); + + /* + * Make the pg_inherits entry + */ + values[Anum_pg_inherits_inhrelid - 1] = ObjectIdGetDatum(relationId); + values[Anum_pg_inherits_inhparent - 1] = ObjectIdGetDatum(parentOid); + values[Anum_pg_inherits_inhseqno - 1] = Int16GetDatum(seqNumber); + + memset(nulls, 0, sizeof(nulls)); + + tuple = heap_form_tuple(RelationGetDescr(inhRelation), values, nulls); + + CatalogTupleInsert(inhRelation, tuple); + + heap_freetuple(tuple); + + heap_close(inhRelation, RowExclusiveLock); +} diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c index 0b4b5631a1..cf37011b73 100644 --- a/src/backend/catalog/toasting.c +++ b/src/backend/catalog/toasting.c @@ -315,6 +315,7 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, indexInfo->ii_ReadyForInserts = true; indexInfo->ii_Concurrent = false; indexInfo->ii_BrokenHotChain = false; + indexInfo->ii_Am = BTREE_AM_OID; indexInfo->ii_AmCache = NULL; indexInfo->ii_Context = CurrentMemoryContext; @@ -328,6 +329,7 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, coloptions[1] = 0; index_create(toast_rel, toast_idxname, toastIndexOid, InvalidOid, + InvalidOid, indexInfo, list_make2("chunk_id", "chunk_seq"), BTREE_AM_OID, diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 9e6ba92008..9b7f29c7fc 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -23,7 +23,10 @@ #include "catalog/catalog.h" #include "catalog/index.h" #include "catalog/indexing.h" +#include "catalog/partition.h" #include "catalog/pg_am.h" +#include "catalog/pg_inherits.h" +#include "catalog/pg_inherits_fn.h" #include "catalog/pg_opclass.h" #include "catalog/pg_opfamily.h" #include "catalog/pg_tablespace.h" @@ -35,6 +38,7 @@ #include "commands/tablespace.h" #include "mb/pg_wchar.h" #include "miscadmin.h" +#include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "optimizer/clauses.h" #include "optimizer/planner.h" @@ -77,6 +81,7 @@ static char *ChooseIndexNameAddition(List *colnames); static List *ChooseIndexColumnNames(List *indexElems); static void RangeVarCallbackForReindexIndex(const RangeVar *relation, Oid relId, Oid oldRelId, void *arg); +static void ReindexPartitionedIndex(Relation parentIdx); /* * CheckIndexCompatible @@ -183,6 +188,7 @@ CheckIndexCompatible(Oid oldId, indexInfo->ii_ExclusionOps = NULL; indexInfo->ii_ExclusionProcs = NULL; indexInfo->ii_ExclusionStrats = NULL; + indexInfo->ii_Am = accessMethodId; indexInfo->ii_AmCache = NULL; indexInfo->ii_Context = CurrentMemoryContext; typeObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid)); @@ -292,14 +298,15 @@ CheckIndexCompatible(Oid oldId, * 'stmt': IndexStmt describing the properties of the new index. * 'indexRelationId': normally InvalidOid, but during bootstrap can be * nonzero to specify a preselected OID for the index. + * 'parentIndexId': the OID of the parent index; InvalidOid if not the child + * of a partitioned index. * 'is_alter_table': this is due to an ALTER rather than a CREATE operation. * 'check_rights': check for CREATE rights in namespace and tablespace. (This * should be true except when ALTER is deleting/recreating an index.) * 'check_not_in_use': check for table not already in use in current session. * This should be true unless caller is holding the table open, in which * case the caller had better have checked it earlier. - * 'skip_build': make the catalog entries but leave the index file empty; - * it will be filled later. + * 'skip_build': make the catalog entries but don't create the index files * 'quiet': suppress the NOTICE chatter ordinarily provided for constraints. * * Returns the object address of the created index. @@ -308,6 +315,7 @@ ObjectAddress DefineIndex(Oid relationId, IndexStmt *stmt, Oid indexRelationId, + Oid parentIndexId, bool is_alter_table, bool check_rights, bool check_not_in_use, @@ -330,6 +338,7 @@ DefineIndex(Oid relationId, IndexAmRoutine *amRoutine; bool amcanorder; amoptions_function amoptions; + bool partitioned; Datum reloptions; int16 *coloptions; IndexInfo *indexInfo; @@ -382,23 +391,56 @@ DefineIndex(Oid relationId, { case RELKIND_RELATION: case RELKIND_MATVIEW: + case RELKIND_PARTITIONED_TABLE: /* OK */ break; case RELKIND_FOREIGN_TABLE: + /* + * Custom error message for FOREIGN TABLE since the term is close + * to a regular table and can confuse the user. + */ ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot create index on foreign table \"%s\"", RelationGetRelationName(rel)))); - case RELKIND_PARTITIONED_TABLE: - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("cannot create index on partitioned table \"%s\"", - RelationGetRelationName(rel)))); default: ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table or materialized view", RelationGetRelationName(rel)))); + break; + } + + /* + * Establish behavior for partitioned tables, and verify sanity of + * parameters. + * + * We do not build an actual index in this case; we only create a few + * catalog entries. The actual indexes are built by recursing for each + * partition. + */ + partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE; + if (partitioned) + { + if (stmt->concurrent) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot create index on partitioned table \"%s\" concurrently", + RelationGetRelationName(rel)))); + if (stmt->unique) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot create unique index on partitioned table \"%s\"", + RelationGetRelationName(rel)))); + if (stmt->excludeOpNames) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot create exclusion constraints on partitioned table \"%s\"", + RelationGetRelationName(rel)))); + if (stmt->primary || stmt->isconstraint) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot create constraints on partitioned tables"))); } /* @@ -574,6 +616,7 @@ DefineIndex(Oid relationId, indexInfo->ii_ReadyForInserts = !stmt->concurrent; indexInfo->ii_Concurrent = stmt->concurrent; indexInfo->ii_BrokenHotChain = false; + indexInfo->ii_Am = accessMethodId; indexInfo->ii_AmCache = NULL; indexInfo->ii_Context = CurrentMemoryContext; @@ -665,19 +708,24 @@ DefineIndex(Oid relationId, /* * Make the catalog entries for the index, including constraints. This * step also actually builds the index, except if caller requested not to - * or in concurrent mode, in which case it'll be done later. + * or in concurrent mode, in which case it'll be done later, or + * doing a partitioned index (because those don't have storage). */ flags = constr_flags = 0; if (stmt->isconstraint) flags |= INDEX_CREATE_ADD_CONSTRAINT; - if (skip_build || stmt->concurrent) + if (skip_build || stmt->concurrent || partitioned) flags |= INDEX_CREATE_SKIP_BUILD; if (stmt->if_not_exists) flags |= INDEX_CREATE_IF_NOT_EXISTS; if (stmt->concurrent) flags |= INDEX_CREATE_CONCURRENT; + if (partitioned) + flags |= INDEX_CREATE_PARTITIONED; if (stmt->primary) flags |= INDEX_CREATE_IS_PRIMARY; + if (partitioned && stmt->relation && !stmt->relation->inh) + flags |= INDEX_CREATE_INVALID; if (stmt->deferrable) constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE; @@ -685,8 +733,8 @@ DefineIndex(Oid relationId, constr_flags |= INDEX_CONSTR_CREATE_INIT_DEFERRED; indexRelationId = - index_create(rel, indexRelationName, indexRelationId, stmt->oldNode, - indexInfo, indexColNames, + index_create(rel, indexRelationName, indexRelationId, parentIndexId, + stmt->oldNode, indexInfo, indexColNames, accessMethodId, tablespaceId, collationObjectId, classObjectId, coloptions, reloptions, @@ -706,6 +754,115 @@ DefineIndex(Oid relationId, CreateComments(indexRelationId, RelationRelationId, 0, stmt->idxcomment); + if (partitioned) + { + /* + * Unless caller specified to skip this step (via ONLY), process + * each partition to make sure they all contain a corresponding index. + * + * If we're called internally (no stmt->relation), recurse always. + */ + if (!stmt->relation || stmt->relation->inh) + { + PartitionDesc partdesc = RelationGetPartitionDesc(rel); + int nparts = partdesc->nparts; + Oid *part_oids; + TupleDesc parentDesc; + + nparts = partdesc->nparts; + part_oids = palloc(sizeof(Oid) * nparts); + memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts); + + parentDesc = CreateTupleDescCopy(RelationGetDescr(rel)); + + heap_close(rel, NoLock); + + /* + * For each partition, scan all existing indexes; if one matches + * our index definition and is not already attached to some other + * parent index, attach it to the one we just created. + * + * If none matches, build a new index by calling ourselves + * recursively with the same options (except for the index name). + */ + for (i = 0; i < nparts; i++) + { + Oid childRelid = part_oids[i]; + Relation childrel; + List *childidxs; + ListCell *cell; + AttrNumber *attmap = NULL; + bool found = false; + + childrel = heap_open(childRelid, lockmode); + childidxs = RelationGetIndexList(childrel); + + foreach(cell, childidxs) + { + Oid cldidxid = lfirst_oid(cell); + Relation cldidx; + IndexInfo *cldIdxInfo; + + /* this index is already partition of another one */ + if (has_superclass(cldidxid)) + continue; + + cldidx = index_open(cldidxid, lockmode); + + cldIdxInfo = BuildIndexInfo(cldidx); + if (attmap == NULL) + attmap = + convert_tuples_by_name_map(RelationGetDescr(childrel), + parentDesc, + gettext_noop("could not convert row type")); + + if (CompareIndexInfo(cldIdxInfo, indexInfo, attmap)) + { + /* + * Found a match. Attach index to parent and we're + * done, but keep lock till commit. + */ + IndexSetParentIndex(cldidx, indexRelationId); + found = true; + index_close(cldidx, NoLock); + break; + } + + index_close(cldidx, lockmode); + } + + list_free(childidxs); + heap_close(childrel, NoLock); + if (attmap) + pfree(attmap); + + /* + * If no matching index was found, create our own. + */ + if (!found) + { + IndexStmt *childStmt = copyObject(stmt); + + childStmt->idxname = NULL; + childStmt->relationId = childRelid; + DefineIndex(childRelid, childStmt, + InvalidOid, /* no predefined OID */ + indexRelationId, /* this is our child */ + false, check_rights, check_not_in_use, + false, quiet); + } + } + } + else + heap_close(rel, NoLock); + + /* + * Indexes on partitioned tables are not themselves built, so we're + * done here. + */ + return address; + } + if (!stmt->concurrent) { /* Close the heap and we're done, in the non-concurrent case */ @@ -1765,7 +1922,7 @@ ChooseIndexColumnNames(List *indexElems) * ReindexIndex * Recreate a specific index. */ -Oid +void ReindexIndex(RangeVar *indexRelation, int options) { Oid indOid; @@ -1788,12 +1945,17 @@ ReindexIndex(RangeVar *indexRelation, int options) * lock on the index. */ irel = index_open(indOid, NoLock); + + if (irel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) + { + ReindexPartitionedIndex(irel); + return; + } + persistence = irel->rd_rel->relpersistence; index_close(irel, NoLock); reindex_index(indOid, false, persistence, options); - - return indOid; } /* @@ -1832,7 +1994,8 @@ RangeVarCallbackForReindexIndex(const RangeVar *relation, relkind = get_rel_relkind(relId); if (!relkind) return; - if (relkind != RELKIND_INDEX) + if (relkind != RELKIND_INDEX && + relkind != RELKIND_PARTITIONED_INDEX) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not an index", relation->relname))); @@ -1976,6 +2139,12 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, /* * Only regular tables and matviews can have indexes, so ignore any * other kind of relation. + * + * It is tempting to also consider partitioned tables here, but that + * has the problem that if the children are in the same schema, they + * would be processed twice. Maybe we could have a separate list of + * partitioned tables, and expand that afterwards into relids, + * ignoring any duplicates. */ if (classtuple->relkind != RELKIND_RELATION && classtuple->relkind != RELKIND_MATVIEW) @@ -2038,3 +2207,136 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, MemoryContextDelete(private_context); } + +/* + * Reindex each child of a partitioned index. + * + * The parent index is given, locked in AccessExclusive mode; this routine + * obtains the list of children and releases the lock on parent before + * applying reindex on each child. + */ +static void +ReindexPartitionedIndex(Relation parentIdx) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("REINDEX is not yet implemented for partitioned indexes"))); +} + +/* + * Insert or delete an appropriate pg_inherits tuple to make the given index + * be a partition of the indicated parent index. + * + * This also corrects the pg_depend information for the affected index. + */ +void +IndexSetParentIndex(Relation partitionIdx, Oid parentOid) +{ + ObjectAddress partIdx; + Relation pg_inherits; + ScanKeyData key[2]; + SysScanDesc scan; + Oid partRelid = RelationGetRelid(partitionIdx); + HeapTuple tuple; + + /* Make sure this is an index */ + Assert(partitionIdx->rd_rel->relkind == RELKIND_INDEX || + partitionIdx->rd_rel->relkind == RELKIND_PARTITIONED_INDEX); + + /* + * Scan pg_inherits and determine whether to add or delete a tuple. + */ + pg_inherits = relation_open(InheritsRelationId, RowExclusiveLock); + ScanKeyInit(&key[0], + Anum_pg_inherits_inhrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(partRelid)); + ScanKeyInit(&key[1], + Anum_pg_inherits_inhseqno, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(1)); + scan = systable_beginscan(pg_inherits, InheritsRelidSeqnoIndexId, true, + NULL, 2, key); + tuple = systable_getnext(scan); + + if (!HeapTupleIsValid(tuple)) + { + if (parentOid == InvalidOid) + { + /* already in the right state */ + systable_endscan(scan); + heap_close(pg_inherits, RowExclusiveLock); + return; + } + else + { + Datum values[Natts_pg_inherits]; + bool isnull[Natts_pg_inherits]; + + values[Anum_pg_inherits_inhrelid - 1] = ObjectIdGetDatum(partRelid); + values[Anum_pg_inherits_inhparent - 1] = + ObjectIdGetDatum(parentOid); + values[Anum_pg_inherits_inhseqno - 1] = Int32GetDatum(1); + memset(isnull, false, sizeof(isnull)); + + tuple = heap_form_tuple(RelationGetDescr(pg_inherits), + values, isnull); + CatalogTupleInsert(pg_inherits, tuple); + systable_endscan(scan); + /* fall through to fix pg_depend */ + } + } + else + { + Form_pg_inherits inhForm = (Form_pg_inherits) GETSTRUCT(tuple); + + if (parentOid == InvalidOid) + { + CatalogTupleDelete(pg_inherits, &tuple->t_self); + systable_endscan(scan); + /* fall through to fix pg_depend */ + } + else + { + if (inhForm->inhparent != parentOid) + { + /* unexpected: we should not get called in this case */ + elog(ERROR, "pg_inherits contains bogus data"); + } + + /* already in the right state */ + systable_endscan(scan); + heap_close(pg_inherits, RowExclusiveLock); + return; + } + } + + relation_close(pg_inherits, RowExclusiveLock); + + /* + * Insert/delete pg_depend rows. If setting a parent, add one; if making + * standalone, remove all existing rows. + */ + ObjectAddressSet(partIdx, RelationRelationId, partRelid); + + if (OidIsValid(parentOid)) + { + ObjectAddress parent; + + ObjectAddressSet(parent, RelationRelationId, parentOid); + recordDependencyOn(&partIdx, &parent, DEPENDENCY_INTERNAL_AUTO); + } + else + { + ObjectAddress partitionTbl; + + ObjectAddressSet(partitionTbl, + RelationRelationId, partitionIdx->rd_index->indrelid); + + deleteDependencyRecordsForClass(RelationRelationId, partRelid, + RelationRelationId, + DEPENDENCY_INTERNAL_AUTO); + + recordDependencyOn(&partIdx, &partitionTbl, DEPENDENCY_AUTO); + } +} diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index f2a928b823..a9a1d7fdd7 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -266,6 +266,12 @@ static const struct dropmsgstrings dropmsgstringarray[] = { gettext_noop("table \"%s\" does not exist, skipping"), gettext_noop("\"%s\" is not a table"), gettext_noop("Use DROP TABLE to remove a table.")}, + {RELKIND_PARTITIONED_INDEX, + ERRCODE_UNDEFINED_OBJECT, + gettext_noop("index \"%s\" does not exist"), + gettext_noop("index \"%s\" does not exist, skipping"), + gettext_noop("\"%s\" is not an index"), + gettext_noop("Use DROP INDEX to remove an index.")}, {'\0', 0, NULL, NULL, NULL, NULL} }; @@ -480,6 +486,11 @@ static void ValidatePartitionConstraints(List **wqueue, Relation scanrel, List *partConstraint, bool validate_default); static ObjectAddress ATExecDetachPartition(Relation rel, RangeVar *name); +static ObjectAddress ATExecAttachPartitionIdx(List **wqueue, Relation rel, + RangeVar *name); +static void validatePartitionedIndex(Relation partedIdx, Relation partedTbl); +static void refuseDupeIndexAttach(Relation parentIdx, Relation partIdx, + Relation partitionTbl); /* ---------------------------------------------------------------- @@ -897,6 +908,53 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, StorePartitionKey(rel, strategy, partnatts, partattrs, partexprs, partopclass, partcollation); + + /* make it all visible */ + CommandCounterIncrement(); + } + + /* + * If we're creating a partition, create now all the indexes defined in + * the parent. We can't do it earlier, because DefineIndex wants to know + * the partition key which we just stored. + */ + if (stmt->partbound) + { + Oid parentId = linitial_oid(inheritOids); + Relation parent; + List *idxlist; + ListCell *cell; + + /* Already have strong enough lock on the parent */ + parent = heap_open(parentId, NoLock); + idxlist = RelationGetIndexList(parent); + + /* + * For each index in the parent table, create one in the partition + */ + foreach(cell, idxlist) + { + Relation idxRel = index_open(lfirst_oid(cell), AccessShareLock); + AttrNumber *attmap; + IndexStmt *idxstmt; + + attmap = convert_tuples_by_name_map(RelationGetDescr(rel), + RelationGetDescr(parent), + gettext_noop("could not convert row type")); + idxstmt = + generateClonedIndexStmt(NULL, RelationGetRelid(rel), idxRel, + attmap, RelationGetDescr(rel)->natts); + DefineIndex(RelationGetRelid(rel), + idxstmt, + InvalidOid, + RelationGetRelid(idxRel), + false, false, false, false, false); + + index_close(idxRel, AccessShareLock); + } + + list_free(idxlist); + heap_close(parent, NoLock); } /* @@ -1179,10 +1237,13 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, * but RemoveRelations() can only pass one relkind for a given relation. * It chooses RELKIND_RELATION for both regular and partitioned tables. * That means we must be careful before giving the wrong type error when - * the relation is RELKIND_PARTITIONED_TABLE. + * the relation is RELKIND_PARTITIONED_TABLE. An equivalent problem + * exists with indexes. */ if (classform->relkind == RELKIND_PARTITIONED_TABLE) expected_relkind = RELKIND_RELATION; + else if (classform->relkind == RELKIND_PARTITIONED_INDEX) + expected_relkind = RELKIND_INDEX; else expected_relkind = classform->relkind; @@ -1210,7 +1271,8 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, * we do it the other way around. No error if we don't find a pg_index * entry, though --- the relation may have been dropped. */ - if (relkind == RELKIND_INDEX && relOid != oldRelOid) + if ((relkind == RELKIND_INDEX || relkind == RELKIND_PARTITIONED_INDEX) && + relOid != oldRelOid) { state->heapOid = IndexGetRelation(relOid, true); if (OidIsValid(state->heapOid)) @@ -2396,27 +2458,11 @@ StoreCatalogInheritance1(Oid relationId, Oid parentOid, int16 seqNumber, Relation inhRelation, bool child_is_partition) { - TupleDesc desc = RelationGetDescr(inhRelation); - Datum values[Natts_pg_inherits]; - bool nulls[Natts_pg_inherits]; ObjectAddress childobject, parentobject; - HeapTuple tuple; - /* - * Make the pg_inherits entry - */ - values[Anum_pg_inherits_inhrelid - 1] = ObjectIdGetDatum(relationId); - values[Anum_pg_inherits_inhparent - 1] = ObjectIdGetDatum(parentOid); - values[Anum_pg_inherits_inhseqno - 1] = Int16GetDatum(seqNumber); - - memset(nulls, 0, sizeof(nulls)); - - tuple = heap_form_tuple(desc, values, nulls); - - CatalogTupleInsert(inhRelation, tuple); - - heap_freetuple(tuple); + /* store the pg_inherits row */ + StoreSingleInheritance(relationId, parentOid, seqNumber); /* * Store a dependency too @@ -2540,6 +2586,7 @@ renameatt_check(Oid myrelid, Form_pg_class classform, bool recursing) relkind != RELKIND_MATVIEW && relkind != RELKIND_COMPOSITE_TYPE && relkind != RELKIND_INDEX && + relkind != RELKIND_PARTITIONED_INDEX && relkind != RELKIND_FOREIGN_TABLE && relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, @@ -3019,7 +3066,8 @@ RenameRelationInternal(Oid myrelid, const char *newrelname, bool is_internal) /* * Also rename the associated constraint, if any. */ - if (targetrelation->rd_rel->relkind == RELKIND_INDEX) + if (targetrelation->rd_rel->relkind == RELKIND_INDEX || + targetrelation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) { Oid constraintId = get_index_constraint(myrelid); @@ -3073,6 +3121,7 @@ CheckTableNotInUse(Relation rel, const char *stmt) stmt, RelationGetRelationName(rel)))); if (rel->rd_rel->relkind != RELKIND_INDEX && + rel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX && AfterTriggerPendingOnRel(RelationGetRelid(rel))) ereport(ERROR, (errcode(ERRCODE_OBJECT_IN_USE), @@ -3764,6 +3813,10 @@ ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd, pass = AT_PASS_MISC; break; case AT_AttachPartition: + ATSimplePermissions(rel, ATT_TABLE | ATT_INDEX); + /* No command-specific prep needed */ + pass = AT_PASS_MISC; + break; case AT_DetachPartition: ATSimplePermissions(rel, ATT_TABLE); /* No command-specific prep needed */ @@ -4112,9 +4165,14 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab, Relation rel, ATExecGenericOptions(rel, (List *) cmd->def); break; case AT_AttachPartition: - ATExecAttachPartition(wqueue, rel, (PartitionCmd *) cmd->def); + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ATExecAttachPartition(wqueue, rel, (PartitionCmd *) cmd->def); + else + ATExecAttachPartitionIdx(wqueue, rel, + ((PartitionCmd *) cmd->def)->name); break; case AT_DetachPartition: + Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE); ATExecDetachPartition(rel, ((PartitionCmd *) cmd->def)->name); break; default: /* oops */ @@ -4750,6 +4808,7 @@ ATSimplePermissions(Relation rel, int allowed_targets) actual_target = ATT_MATVIEW; break; case RELKIND_INDEX: + case RELKIND_PARTITIONED_INDEX: actual_target = ATT_INDEX; break; case RELKIND_COMPOSITE_TYPE: @@ -6194,6 +6253,7 @@ ATPrepSetStatistics(Relation rel, const char *colName, int16 colNum, Node *newVa if (rel->rd_rel->relkind != RELKIND_RELATION && rel->rd_rel->relkind != RELKIND_MATVIEW && rel->rd_rel->relkind != RELKIND_INDEX && + rel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX && rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE && rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, @@ -6205,7 +6265,9 @@ ATPrepSetStatistics(Relation rel, const char *colName, int16 colNum, Node *newVa * We allow referencing columns by numbers only for indexes, since table * column numbers could contain gaps if columns are later dropped. */ - if (rel->rd_rel->relkind != RELKIND_INDEX && !colName) + if (rel->rd_rel->relkind != RELKIND_INDEX && + rel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX && + !colName) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot refer to non-index column by number"))); @@ -6283,7 +6345,8 @@ ATExecSetStatistics(Relation rel, const char *colName, int16 colNum, Node *newVa errmsg("cannot alter system column \"%s\"", colName))); - if (rel->rd_rel->relkind == RELKIND_INDEX && + if ((rel->rd_rel->relkind == RELKIND_INDEX || + rel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) && rel->rd_index->indkey.values[attnum - 1] != 0) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), @@ -6736,6 +6799,7 @@ ATExecAddIndex(AlteredTableInfo *tab, Relation rel, address = DefineIndex(RelationGetRelid(rel), stmt, InvalidOid, /* no predefined OID */ + InvalidOid, /* no parent index */ true, /* is_alter_table */ check_rights, false, /* check_not_in_use - we did it already */ @@ -9139,7 +9203,8 @@ ATExecAlterColumnType(AlteredTableInfo *tab, Relation rel, { char relKind = get_rel_relkind(foundObject.objectId); - if (relKind == RELKIND_INDEX) + if (relKind == RELKIND_INDEX || + relKind == RELKIND_PARTITIONED_INDEX) { Assert(foundObject.objectSubId == 0); if (!list_member_oid(tab->changedIndexOids, foundObject.objectId)) @@ -9982,6 +10047,15 @@ ATExecChangeOwner(Oid relationOid, Oid newOwnerId, bool recursing, LOCKMODE lock newOwnerId = tuple_class->relowner; } break; + case RELKIND_PARTITIONED_INDEX: + if (recursing) + break; + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot change owner of index \"%s\"", + NameStr(tuple_class->relname)), + errhint("Change the ownership of the index's table, instead."))); + break; case RELKIND_SEQUENCE: if (!recursing && tuple_class->relowner != newOwnerId) @@ -10103,6 +10177,7 @@ ATExecChangeOwner(Oid relationOid, Oid newOwnerId, bool recursing, LOCKMODE lock */ if (tuple_class->relkind != RELKIND_COMPOSITE_TYPE && tuple_class->relkind != RELKIND_INDEX && + tuple_class->relkind != RELKIND_PARTITIONED_INDEX && tuple_class->relkind != RELKIND_TOASTVALUE) changeDependencyOnOwner(RelationRelationId, relationOid, newOwnerId); @@ -10110,7 +10185,8 @@ ATExecChangeOwner(Oid relationOid, Oid newOwnerId, bool recursing, LOCKMODE lock /* * Also change the ownership of the table's row type, if it has one */ - if (tuple_class->relkind != RELKIND_INDEX) + if (tuple_class->relkind != RELKIND_INDEX && + tuple_class->relkind != RELKIND_PARTITIONED_INDEX) AlterTypeOwnerInternal(tuple_class->reltype, newOwnerId); /* @@ -10119,6 +10195,7 @@ ATExecChangeOwner(Oid relationOid, Oid newOwnerId, bool recursing, LOCKMODE lock * relation, as well as its toast table (if it has one). */ if (tuple_class->relkind == RELKIND_RELATION || + tuple_class->relkind == RELKIND_PARTITIONED_TABLE || tuple_class->relkind == RELKIND_MATVIEW || tuple_class->relkind == RELKIND_TOASTVALUE) { @@ -10427,6 +10504,7 @@ ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation, (void) view_reloptions(newOptions, true); break; case RELKIND_INDEX: + case RELKIND_PARTITIONED_INDEX: (void) index_reloptions(rel->rd_amroutine->amoptions, newOptions, true); break; default: @@ -10839,7 +10917,8 @@ AlterTableMoveAll(AlterTableMoveAllStmt *stmt) relForm->relkind != RELKIND_RELATION && relForm->relkind != RELKIND_PARTITIONED_TABLE) || (stmt->objtype == OBJECT_INDEX && - relForm->relkind != RELKIND_INDEX) || + relForm->relkind != RELKIND_INDEX && + relForm->relkind != RELKIND_PARTITIONED_INDEX) || (stmt->objtype == OBJECT_MATVIEW && relForm->relkind != RELKIND_MATVIEW)) continue; @@ -13226,7 +13305,8 @@ RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a composite type", rv->relname))); - if (reltype == OBJECT_INDEX && relkind != RELKIND_INDEX + if (reltype == OBJECT_INDEX && relkind != RELKIND_INDEX && + relkind != RELKIND_PARTITIONED_INDEX && !IsA(stmt, RenameStmt)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), @@ -13947,6 +14027,103 @@ ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd) StorePartitionBound(attachrel, rel, cmd->bound); /* + * Ensure a correct set of indexes in the partition. This either creates + * a new index in the table being attached, or re-parents an existing one. + */ + { + AttrNumber *attmap = NULL; + List *idxes; + List *attachRelIdxs; + Relation *attachrelIdxRels; + IndexInfo **attachInfos; + int i; + ListCell *cell; + + idxes = RelationGetIndexList(rel); + attachRelIdxs = RelationGetIndexList(attachrel); + attachrelIdxRels = palloc(sizeof(Relation) * list_length(attachRelIdxs)); + attachInfos = palloc(sizeof(IndexInfo *) * list_length(attachRelIdxs)); + + /* Build arrays of all existing indexes and their IndexInfos */ + i = 0; + foreach(cell, attachRelIdxs) + { + Oid cldIdxId = lfirst_oid(cell); + + attachrelIdxRels[i] = index_open(cldIdxId, AccessShareLock); + attachInfos[i] = BuildIndexInfo(attachrelIdxRels[i]); + i++; + } + + /* + * For each index on the partitioned table, find a matching one in the + * partition-to-be; if one is not found, create one. + */ + foreach(cell, idxes) + { + Oid idx = lfirst_oid(cell); + Relation idxRel = index_open(idx, AccessShareLock); + IndexInfo *info; + bool found = false; + + if (idxRel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX) + { + index_close(idxRel, AccessShareLock); + continue; + } + info = BuildIndexInfo(idxRel); + if (attmap == NULL) + attmap = + convert_tuples_by_name_map(RelationGetDescr(attachrel), + RelationGetDescr(rel), + gettext_noop("could not convert row type")); + + for (i = 0; i < list_length(attachRelIdxs); i++) + { + /* already used it */ + if (has_superclass(RelationGetRelid(attachrelIdxRels[i]))) + continue; + + if (CompareIndexInfo(info, attachInfos[i], attmap)) + { + /* bingo. */ + IndexSetParentIndex(attachrelIdxRels[i], idx); + found = true; + break; + } + } + if (!found) + { + IndexStmt *stmt; + + stmt = generateClonedIndexStmt(NULL, RelationGetRelid(attachrel), + idxRel, attmap, + RelationGetDescr(rel)->natts); + DefineIndex(RelationGetRelid(attachrel), stmt, InvalidOid, + RelationGetRelid(idxRel), + false, false, false, false, false); + } + + index_close(idxRel, AccessShareLock); + } + + /* Clean up. */ + if (attmap) + pfree(attmap); + + for (i = 0; i < list_length(attachRelIdxs); i++) + { + pfree(attachInfos[i]); + index_close(attachrelIdxRels[i], AccessShareLock); + } + + if (idxes) + pfree(idxes); + if (attachRelIdxs) + pfree(attachRelIdxs); + } + + /* * Generate partition constraint from the partition bound specification. * If the parent itself is a partition, make sure to include its * constraint as well. @@ -14033,6 +14210,8 @@ ATExecDetachPartition(Relation rel, RangeVar *name) new_repl[Natts_pg_class]; ObjectAddress address; Oid defaultPartOid; + List *indexes; + ListCell *cell; /* * We must lock the default partition, because detaching this partition @@ -14094,6 +14273,25 @@ ATExecDetachPartition(Relation rel, RangeVar *name) } } + /* detach indexes too */ + indexes = RelationGetIndexList(partRel); + foreach(cell, indexes) + { + Oid idxid = lfirst_oid(cell); + Relation idx; + + if (!has_superclass(idxid)) + continue; + + Assert(!has_superclass(idxid) || + (IndexGetRelation(get_partition_parent(idxid), false) == + RelationGetRelid(rel))); + + idx = index_open(idxid, AccessExclusiveLock); + IndexSetParentIndex(idx, InvalidOid); + relation_close(idx, AccessExclusiveLock); + } + /* * Invalidate the parent's relcache so that the partition is no longer * included in its partition descriptor. @@ -14107,3 +14305,276 @@ ATExecDetachPartition(Relation rel, RangeVar *name) return address; } + +/* + * Before acquiring lock on an index, acquire the same lock on the owning + * table. + */ +struct AttachIndexCallbackState +{ + Oid partitionOid; + Oid parentTblOid; + bool lockedParentTbl; +}; + +static void +RangeVarCallbackForAttachIndex(const RangeVar *rv, Oid relOid, Oid oldRelOid, + void *arg) +{ + struct AttachIndexCallbackState *state; + Form_pg_class classform; + HeapTuple tuple; + + state = (struct AttachIndexCallbackState *) arg; + + if (!state->lockedParentTbl) + { + LockRelationOid(state->parentTblOid, AccessShareLock); + state->lockedParentTbl = true; + } + + /* + * If we previously locked some other heap, and the name we're looking up + * no longer refers to an index on that relation, release the now-useless + * lock. XXX maybe we should do *after* we verify whether the index does + * not actually belong to the same relation ... + */ + if (relOid != oldRelOid && OidIsValid(state->partitionOid)) + { + UnlockRelationOid(state->partitionOid, AccessShareLock); + state->partitionOid = InvalidOid; + } + + /* Didn't find a relation, so no need for locking or permission checks. */ + if (!OidIsValid(relOid)) + return; + + tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relOid)); + if (!HeapTupleIsValid(tuple)) + return; /* concurrently dropped, so nothing to do */ + classform = (Form_pg_class) GETSTRUCT(tuple); + if (classform->relkind != RELKIND_PARTITIONED_INDEX && + classform->relkind != RELKIND_INDEX) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("\"%s\" is not an index", rv->relname))); + ReleaseSysCache(tuple); + + /* + * Since we need only examine the heap's tupledesc, an access share lock + * on it (preventing any DDL) is sufficient. + */ + state->partitionOid = IndexGetRelation(relOid, false); + LockRelationOid(state->partitionOid, AccessShareLock); +} + +/* + * ALTER INDEX i1 ATTACH PARTITION i2 + */ +static ObjectAddress +ATExecAttachPartitionIdx(List **wqueue, Relation parentIdx, RangeVar *name) +{ + Relation partIdx; + Relation partTbl; + Relation parentTbl; + ObjectAddress address; + Oid partIdxId; + Oid currParent; + struct AttachIndexCallbackState state; + + /* + * We need to obtain lock on the index 'name' to modify it, but we also + * need to read its owning table's tuple descriptor -- so we need to lock + * both. To avoid deadlocks, obtain lock on the table before doing so on + * the index. Furthermore, we need to examine the parent table of the + * partition, so lock that one too. + */ + state.partitionOid = InvalidOid; + state.parentTblOid = parentIdx->rd_index->indrelid; + state.lockedParentTbl = false; + partIdxId = + RangeVarGetRelidExtended(name, AccessExclusiveLock, false, false, + RangeVarCallbackForAttachIndex, + (void *) &state); + /* Not there? */ + if (!OidIsValid(partIdxId)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("index \"%s\" does not exist", name->relname))); + + /* no deadlock risk: our callback above already acquired the lock */ + partIdx = relation_open(partIdxId, AccessExclusiveLock); + + /* we already hold lock on both tables, so this is safe: */ + parentTbl = relation_open(parentIdx->rd_index->indrelid, AccessShareLock); + partTbl = relation_open(partIdx->rd_index->indrelid, NoLock); + + ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partIdx)); + + /* Silently do nothing if already the right state */ + currParent = !has_superclass(partIdxId) ? InvalidOid : + get_partition_parent(partIdxId); + if (currParent != RelationGetRelid(parentIdx)) + { + IndexInfo *childInfo; + IndexInfo *parentInfo; + AttrNumber *attmap; + bool found; + int i; + PartitionDesc partDesc; + + /* + * If this partition already has an index attached, refuse the operation. + */ + refuseDupeIndexAttach(parentIdx, partIdx, partTbl); + + if (OidIsValid(currParent)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot attach index \"%s\" as a partition of index \"%s\"", + RelationGetRelationName(partIdx), + RelationGetRelationName(parentIdx)), + errdetail("Index \"%s\" is already attached to another index.", + RelationGetRelationName(partIdx)))); + + /* Make sure it indexes a partition of the other index's table */ + partDesc = RelationGetPartitionDesc(parentTbl); + found = false; + for (i = 0; i < partDesc->nparts; i++) + { + if (partDesc->oids[i] == state.partitionOid) + { + found = true; + break; + } + } + if (!found) + ereport(ERROR, + (errmsg("cannot attach index \"%s\" as a partition of index \"%s\"", + RelationGetRelationName(partIdx), + RelationGetRelationName(parentIdx)), + errdetail("Index \"%s\" is not on a partition of table \"%s\".", + RelationGetRelationName(partIdx), + RelationGetRelationName(parentTbl)))); + + /* Ensure the indexes are compatible */ + childInfo = BuildIndexInfo(partIdx); + parentInfo = BuildIndexInfo(parentIdx); + attmap = convert_tuples_by_name_map(RelationGetDescr(parentTbl), + RelationGetDescr(partTbl), + gettext_noop("could not convert row type")); + if (!CompareIndexInfo(parentInfo, childInfo, attmap)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot attach index \"%s\" as a partition of index \"%s\"", + RelationGetRelationName(partIdx), + RelationGetRelationName(parentIdx)), + errdetail("The index definitions do not match."))); + + /* All good -- do it */ + IndexSetParentIndex(partIdx, RelationGetRelid(parentIdx)); + pfree(attmap); + + CommandCounterIncrement(); + + validatePartitionedIndex(parentIdx, parentTbl); + } + + relation_close(parentTbl, AccessShareLock); + /* keep these locks till commit */ + relation_close(partTbl, NoLock); + relation_close(partIdx, NoLock); + + return address; +} + +/* + * Verify whether the given partition already contains an index attached + * to the given partitioned index. If so, raise an error. + */ +static void +refuseDupeIndexAttach(Relation parentIdx, Relation partIdx, Relation partitionTbl) +{ + Relation pg_inherits; + ScanKeyData key; + HeapTuple tuple; + SysScanDesc scan; + + pg_inherits = heap_open(InheritsRelationId, AccessShareLock); + ScanKeyInit(&key, Anum_pg_inherits_inhparent, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(parentIdx))); + scan = systable_beginscan(pg_inherits, InheritsParentIndexId, true, + NULL, 1, &key); + while (HeapTupleIsValid(tuple = systable_getnext(scan))) + { + Form_pg_inherits inhForm; + Oid tab; + + inhForm = (Form_pg_inherits) GETSTRUCT(tuple); + tab = IndexGetRelation(inhForm->inhrelid, false); + if (tab == RelationGetRelid(partitionTbl)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot attach index \"%s\" as a partition of index \"%s\"", + RelationGetRelationName(partIdx), + RelationGetRelationName(parentIdx)), + errdetail("Another index is already attached for partition \"%s\".", + RelationGetRelationName(partitionTbl)))); + } + + systable_endscan(scan); + heap_close(pg_inherits, AccessShareLock); +} + +/* + * Verify whether the set of attached partition indexes to a parent index on + * a partitioned table is complete. If it is, mark the parent index valid. + * + * This should be called each time a partition index is attached. + */ +static void +validatePartitionedIndex(Relation partedIdx, Relation partedTbl) +{ + Relation idxRel; + Relation inheritsRel; + SysScanDesc scan; + ScanKeyData key; + int tuples = 0; + HeapTuple tuple; + PartitionDesc partDesc; + + Assert(partedIdx->rd_rel->relkind == RELKIND_PARTITIONED_INDEX); + + /* scan pg_inherits counting indexes */ + inheritsRel = heap_open(InheritsRelationId, AccessShareLock); + ScanKeyInit(&key, Anum_pg_inherits_inhparent, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(partedIdx))); + scan = systable_beginscan(inheritsRel, InheritsParentIndexId, true, + NULL, 1, &key); + while ((tuple = systable_getnext(scan)) != NULL) + tuples += 1; + systable_endscan(scan); + + /* + * If we found as many inherited indexes as the partitioned table has + * partitions, we're good; update pg_index to set indisvalid. + */ + partDesc = RelationGetPartitionDesc(partedTbl); + if (tuples == partDesc->nparts) + { + HeapTuple newtup; + + idxRel = heap_open(IndexRelationId, RowExclusiveLock); + + newtup = heap_copytuple(partedIdx->rd_indextuple); + ((Form_pg_index) GETSTRUCT(newtup))->indisvalid = true; + + CatalogTupleUpdate(idxRel, &partedIdx->rd_indextuple->t_self, newtup); + + heap_close(idxRel, RowExclusiveLock); + } + + heap_close(inheritsRel, AccessShareLock); +} diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index ddbbc79823..65d8c77d7a 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -3379,6 +3379,7 @@ _copyIndexStmt(const IndexStmt *from) COPY_STRING_FIELD(idxname); COPY_NODE_FIELD(relation); + COPY_SCALAR_FIELD(relationId); COPY_STRING_FIELD(accessMethod); COPY_STRING_FIELD(tableSpace); COPY_NODE_FIELD(indexParams); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 30ccc9c5ae..0bd12e862e 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -1332,6 +1332,7 @@ _equalIndexStmt(const IndexStmt *a, const IndexStmt *b) { COMPARE_STRING_FIELD(idxname); COMPARE_NODE_FIELD(relation); + COMPARE_SCALAR_FIELD(relationId); COMPARE_STRING_FIELD(accessMethod); COMPARE_STRING_FIELD(tableSpace); COMPARE_NODE_FIELD(indexParams); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 5e72df137e..b1cdfc36a6 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2650,6 +2650,7 @@ _outIndexStmt(StringInfo str, const IndexStmt *node) WRITE_STRING_FIELD(idxname); WRITE_NODE_FIELD(relation); + WRITE_OID_FIELD(relationId); WRITE_STRING_FIELD(accessMethod); WRITE_STRING_FIELD(tableSpace); WRITE_NODE_FIELD(indexParams); diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 8c60b35068..5f263395c5 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -372,7 +372,12 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, * a table, except we can be sure that the index is not larger * than the table. */ - if (info->indpred == NIL) + if (indexRelation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) + { + info->pages = 0; + info->tuples = 0; + } + else if (info->indpred == NIL) { info->pages = RelationGetNumberOfBlocks(indexRelation); info->tuples = rel->tuples; @@ -387,7 +392,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, info->tuples = rel->tuples; } - if (info->relam == BTREE_AM_OID) + if (indexRelation->rd_rel->relkind == RELKIND_INDEX && + info->relam == BTREE_AM_OID) { /* For btrees, get tree height while we have the index open */ info->tree_height = _bt_getrootheight(indexRelation); diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 16923e853a..04ef741a4c 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -290,7 +290,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type add_drop opt_asc_desc opt_nulls_order %type alter_table_cmd alter_type_cmd opt_collate_clause - replica_identity partition_cmd + replica_identity partition_cmd index_partition_cmd %type alter_table_cmds alter_type_cmds %type alter_identity_column_option_list %type alter_identity_column_option @@ -1891,6 +1891,15 @@ AlterTableStmt: n->missing_ok = true; $$ = (Node *)n; } + | ALTER INDEX qualified_name index_partition_cmd + { + AlterTableStmt *n = makeNode(AlterTableStmt); + n->relation = $3; + n->cmds = list_make1($4); + n->relkind = OBJECT_INDEX; + n->missing_ok = false; + $$ = (Node *)n; + } | ALTER INDEX ALL IN_P TABLESPACE name SET TABLESPACE name opt_nowait { AlterTableMoveAllStmt *n = @@ -2025,6 +2034,22 @@ partition_cmd: } ; +index_partition_cmd: + /* ALTER INDEX ATTACH PARTITION */ + ATTACH PARTITION qualified_name + { + AlterTableCmd *n = makeNode(AlterTableCmd); + PartitionCmd *cmd = makeNode(PartitionCmd); + + n->subtype = AT_AttachPartition; + cmd->name = $3; + cmd->bound = NULL; + n->def = (Node *) cmd; + + $$ = (Node *) n; + } + ; + alter_table_cmd: /* ALTER TABLE ADD */ ADD_P columnDef @@ -7330,7 +7355,7 @@ defacl_privilege_target: *****************************************************************************/ IndexStmt: CREATE opt_unique INDEX opt_concurrently opt_index_name - ON qualified_name access_method_clause '(' index_params ')' + ON relation_expr access_method_clause '(' index_params ')' opt_reloptions OptTableSpace where_clause { IndexStmt *n = makeNode(IndexStmt); @@ -7338,6 +7363,7 @@ IndexStmt: CREATE opt_unique INDEX opt_concurrently opt_index_name n->concurrent = $4; n->idxname = $5; n->relation = $7; + n->relationId = InvalidOid; n->accessMethod = $8; n->indexParams = $10; n->options = $12; @@ -7356,7 +7382,7 @@ IndexStmt: CREATE opt_unique INDEX opt_concurrently opt_index_name $$ = (Node *)n; } | CREATE opt_unique INDEX opt_concurrently IF_P NOT EXISTS index_name - ON qualified_name access_method_clause '(' index_params ')' + ON relation_expr access_method_clause '(' index_params ')' opt_reloptions OptTableSpace where_clause { IndexStmt *n = makeNode(IndexStmt); diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index 128f1679c6..90bb356df8 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -118,9 +118,6 @@ static void transformTableLikeClause(CreateStmtContext *cxt, TableLikeClause *table_like_clause); static void transformOfType(CreateStmtContext *cxt, TypeName *ofTypename); -static IndexStmt *generateClonedIndexStmt(CreateStmtContext *cxt, - Relation source_idx, - const AttrNumber *attmap, int attmap_length); static List *get_collation(Oid collation, Oid actual_datatype); static List *get_opclass(Oid opclass, Oid actual_datatype); static void transformIndexConstraints(CreateStmtContext *cxt); @@ -1185,7 +1182,8 @@ transformTableLikeClause(CreateStmtContext *cxt, TableLikeClause *table_like_cla parent_index = index_open(parent_index_oid, AccessShareLock); /* Build CREATE INDEX statement to recreate the parent_index */ - index_stmt = generateClonedIndexStmt(cxt, parent_index, + index_stmt = generateClonedIndexStmt(cxt->relation, InvalidOid, + parent_index, attmap, tupleDesc->natts); /* Copy comment on index, if requested */ @@ -1263,10 +1261,12 @@ transformOfType(CreateStmtContext *cxt, TypeName *ofTypename) /* * Generate an IndexStmt node using information from an already existing index - * "source_idx". Attribute numbers should be adjusted according to attmap. + * "source_idx", for the rel identified either by heapRel or heapRelid. + * + * Attribute numbers should be adjusted according to attmap. */ -static IndexStmt * -generateClonedIndexStmt(CreateStmtContext *cxt, Relation source_idx, +IndexStmt * +generateClonedIndexStmt(RangeVar *heapRel, Oid heapRelid, Relation source_idx, const AttrNumber *attmap, int attmap_length) { Oid source_relid = RelationGetRelid(source_idx); @@ -1287,6 +1287,9 @@ generateClonedIndexStmt(CreateStmtContext *cxt, Relation source_idx, Datum datum; bool isnull; + Assert((heapRel == NULL && OidIsValid(heapRelid)) || + (heapRel != NULL && !OidIsValid(heapRelid))); + /* * Fetch pg_class tuple of source index. We can't use the copy in the * relcache entry because it doesn't include optional fields. @@ -1322,7 +1325,8 @@ generateClonedIndexStmt(CreateStmtContext *cxt, Relation source_idx, /* Begin building the IndexStmt */ index = makeNode(IndexStmt); - index->relation = cxt->relation; + index->relation = heapRel; + index->relationId = heapRelid; index->accessMethod = pstrdup(NameStr(amrec->amname)); if (OidIsValid(idxrelrec->reltablespace)) index->tableSpace = get_tablespace_name(idxrelrec->reltablespace); @@ -3289,18 +3293,39 @@ transformPartitionCmd(CreateStmtContext *cxt, PartitionCmd *cmd) { Relation parentRel = cxt->rel; - /* the table must be partitioned */ - if (parentRel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) - ereport(ERROR, - (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), - errmsg("\"%s\" is not partitioned", - RelationGetRelationName(parentRel)))); - - /* transform the partition bound, if any */ - Assert(RelationGetPartitionKey(parentRel) != NULL); - if (cmd->bound != NULL) - cxt->partbound = transformPartitionBound(cxt->pstate, parentRel, - cmd->bound); + switch (parentRel->rd_rel->relkind) + { + case RELKIND_PARTITIONED_TABLE: + /* transform the partition bound, if any */ + Assert(RelationGetPartitionKey(parentRel) != NULL); + if (cmd->bound != NULL) + cxt->partbound = transformPartitionBound(cxt->pstate, parentRel, + cmd->bound); + break; + case RELKIND_PARTITIONED_INDEX: + /* nothing to check */ + Assert(cmd->bound == NULL); + break; + case RELKIND_RELATION: + /* the table must be partitioned */ + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("table \"%s\" is not partitioned", + RelationGetRelationName(parentRel)))); + break; + case RELKIND_INDEX: + /* the index must be partitioned */ + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("index \"%s\" is not partitioned", + RelationGetRelationName(parentRel)))); + break; + default: + /* parser shouldn't let this case through */ + elog(ERROR, "\"%s\" is not a partitioned table or index", + RelationGetRelationName(parentRel)); + break; + } } /* diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index ec98a612ec..16c4f8fad4 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -23,6 +23,7 @@ #include "access/xlog.h" #include "catalog/catalog.h" #include "catalog/namespace.h" +#include "catalog/pg_inherits_fn.h" #include "catalog/toasting.h" #include "commands/alter.h" #include "commands/async.h" @@ -1300,6 +1301,7 @@ ProcessUtilitySlow(ParseState *pstate, IndexStmt *stmt = (IndexStmt *) parsetree; Oid relid; LOCKMODE lockmode; + List *inheritors = NIL; if (stmt->concurrent) PreventTransactionChain(isTopLevel, @@ -1321,6 +1323,9 @@ ProcessUtilitySlow(ParseState *pstate, false, false, RangeVarCallbackOwnsRelation, NULL); + /* Also, lock any descendant tables if recursive */ + if (stmt->relation->inh) + inheritors = find_all_inheritors(relid, lockmode, NULL); /* Run parse analysis ... */ stmt = transformIndexStmt(relid, stmt, queryString); @@ -1331,6 +1336,7 @@ ProcessUtilitySlow(ParseState *pstate, DefineIndex(relid, /* OID of heap relation */ stmt, InvalidOid, /* no predefined OID */ + InvalidOid, /* no parent index */ false, /* is_alter_table */ true, /* check_rights */ true, /* check_not_in_use */ @@ -1346,6 +1352,9 @@ ProcessUtilitySlow(ParseState *pstate, parsetree); commandCollected = true; EventTriggerAlterTableEnd(); + + if (inheritors) + list_free(inheritors); } break; diff --git a/src/backend/utils/adt/amutils.c b/src/backend/utils/adt/amutils.c index a6d8feea5b..0f7ceb62eb 100644 --- a/src/backend/utils/adt/amutils.c +++ b/src/backend/utils/adt/amutils.c @@ -183,7 +183,8 @@ indexam_property(FunctionCallInfo fcinfo, if (!HeapTupleIsValid(tuple)) PG_RETURN_NULL(); rd_rel = (Form_pg_class) GETSTRUCT(tuple); - if (rd_rel->relkind != RELKIND_INDEX) + if (rd_rel->relkind != RELKIND_INDEX && + rd_rel->relkind != RELKIND_PARTITIONED_INDEX) { ReleaseSysCache(tuple); PG_RETURN_NULL(); diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 9cdbb06add..c5f5a1ca3f 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -317,7 +317,7 @@ static void decompile_column_index_array(Datum column_index_array, Oid relId, static char *pg_get_ruledef_worker(Oid ruleoid, int prettyFlags); static char *pg_get_indexdef_worker(Oid indexrelid, int colno, const Oid *excludeOps, - bool attrsOnly, bool showTblSpc, + bool attrsOnly, bool showTblSpc, bool inherits, int prettyFlags, bool missing_ok); static char *pg_get_statisticsobj_worker(Oid statextid, bool missing_ok); static char *pg_get_partkeydef_worker(Oid relid, int prettyFlags, @@ -1086,7 +1086,7 @@ pg_get_indexdef(PG_FUNCTION_ARGS) prettyFlags = PRETTYFLAG_INDENT; - res = pg_get_indexdef_worker(indexrelid, 0, NULL, false, false, + res = pg_get_indexdef_worker(indexrelid, 0, NULL, false, false, false, prettyFlags, true); if (res == NULL) @@ -1107,7 +1107,7 @@ pg_get_indexdef_ext(PG_FUNCTION_ARGS) prettyFlags = pretty ? PRETTYFLAG_PAREN | PRETTYFLAG_INDENT : PRETTYFLAG_INDENT; res = pg_get_indexdef_worker(indexrelid, colno, NULL, colno != 0, false, - prettyFlags, true); + false, prettyFlags, true); if (res == NULL) PG_RETURN_NULL(); @@ -1123,7 +1123,7 @@ pg_get_indexdef_ext(PG_FUNCTION_ARGS) char * pg_get_indexdef_string(Oid indexrelid) { - return pg_get_indexdef_worker(indexrelid, 0, NULL, false, true, 0, false); + return pg_get_indexdef_worker(indexrelid, 0, NULL, false, true, true, 0, false); } /* Internal version that just reports the column definitions */ @@ -1133,7 +1133,7 @@ pg_get_indexdef_columns(Oid indexrelid, bool pretty) int prettyFlags; prettyFlags = pretty ? PRETTYFLAG_PAREN | PRETTYFLAG_INDENT : PRETTYFLAG_INDENT; - return pg_get_indexdef_worker(indexrelid, 0, NULL, true, false, + return pg_get_indexdef_worker(indexrelid, 0, NULL, true, false, false, prettyFlags, false); } @@ -1146,7 +1146,7 @@ pg_get_indexdef_columns(Oid indexrelid, bool pretty) static char * pg_get_indexdef_worker(Oid indexrelid, int colno, const Oid *excludeOps, - bool attrsOnly, bool showTblSpc, + bool attrsOnly, bool showTblSpc, bool inherits, int prettyFlags, bool missing_ok) { /* might want a separate isConstraint parameter later */ @@ -1259,9 +1259,11 @@ pg_get_indexdef_worker(Oid indexrelid, int colno, if (!attrsOnly) { if (!isConstraint) - appendStringInfo(&buf, "CREATE %sINDEX %s ON %s USING %s (", + appendStringInfo(&buf, "CREATE %sINDEX %s ON %s%s USING %s (", idxrec->indisunique ? "UNIQUE " : "", quote_identifier(NameStr(idxrelrec->relname)), + idxrelrec->relkind == RELKIND_PARTITIONED_INDEX + && !inherits ? "ONLY " : "", generate_relation_name(indrelid, NIL), quote_identifier(NameStr(amrec->amname))); else /* currently, must be EXCLUDE constraint */ @@ -2148,6 +2150,7 @@ pg_get_constraintdef_worker(Oid constraintId, bool fullCommand, operators, false, false, + false, prettyFlags, false)); break; diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 28a4483434..a28046f857 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -430,6 +430,7 @@ static void RelationParseRelOptions(Relation relation, HeapTuple tuple) { bytea *options; + bool isindex; relation->rd_options = NULL; @@ -439,6 +440,7 @@ RelationParseRelOptions(Relation relation, HeapTuple tuple) case RELKIND_RELATION: case RELKIND_TOASTVALUE: case RELKIND_INDEX: + case RELKIND_PARTITIONED_INDEX: case RELKIND_VIEW: case RELKIND_MATVIEW: case RELKIND_PARTITIONED_TABLE: @@ -452,10 +454,12 @@ RelationParseRelOptions(Relation relation, HeapTuple tuple) * we might not have any other for pg_class yet (consider executing this * code for pg_class itself) */ + isindex = relation->rd_rel->relkind == RELKIND_INDEX || + relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX; options = extractRelOptions(tuple, GetPgClassDescriptor(), - relation->rd_rel->relkind == RELKIND_INDEX ? - relation->rd_amroutine->amoptions : NULL); + isindex ? relation->rd_amroutine->amoptions : + NULL); /* * Copy parsed data into CacheMemoryContext. To guard against the @@ -2053,7 +2057,8 @@ RelationIdGetRelation(Oid relationId) * and we don't want to use the full-blown procedure because it's * a headache for indexes that reload itself depends on. */ - if (rd->rd_rel->relkind == RELKIND_INDEX) + if (rd->rd_rel->relkind == RELKIND_INDEX || + rd->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) RelationReloadIndexInfo(rd); else RelationClearRelation(rd, true); @@ -2167,7 +2172,8 @@ RelationReloadIndexInfo(Relation relation) Form_pg_class relp; /* Should be called only for invalidated indexes */ - Assert(relation->rd_rel->relkind == RELKIND_INDEX && + Assert((relation->rd_rel->relkind == RELKIND_INDEX || + relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) && !relation->rd_isvalid); /* Ensure it's closed at smgr level */ @@ -2387,7 +2393,8 @@ RelationClearRelation(Relation relation, bool rebuild) { RelationInitPhysicalAddr(relation); - if (relation->rd_rel->relkind == RELKIND_INDEX) + if (relation->rd_rel->relkind == RELKIND_INDEX || + relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) { relation->rd_isvalid = false; /* needs to be revalidated */ if (relation->rd_refcnt > 1 && IsTransactionState()) @@ -2403,7 +2410,8 @@ RelationClearRelation(Relation relation, bool rebuild) * re-read the pg_class row to handle possible physical relocation of the * index, and we check for pg_index updates too. */ - if (relation->rd_rel->relkind == RELKIND_INDEX && + if ((relation->rd_rel->relkind == RELKIND_INDEX || + relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) && relation->rd_refcnt > 0 && relation->rd_indexcxt != NULL) { @@ -5461,7 +5469,10 @@ load_relcache_init_file(bool shared) rel->rd_att->constr = constr; } - /* If it's an index, there's more to do */ + /* + * If it's an index, there's more to do. Note we explicitly ignore + * partitioned indexes here. + */ if (rel->rd_rel->relkind == RELKIND_INDEX) { MemoryContext indexcxt; @@ -5825,7 +5836,10 @@ write_relcache_init_file(bool shared) (rel->rd_options ? VARSIZE(rel->rd_options) : 0), fp); - /* If it's an index, there's more to do */ + /* + * If it's an index, there's more to do. Note we explicitly ignore + * partitioned indexes here. + */ if (rel->rd_rel->relkind == RELKIND_INDEX) { /* write the pg_index tuple */ diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c index 7f5f351486..9483053680 100644 --- a/src/bin/pg_dump/common.c +++ b/src/bin/pg_dump/common.c @@ -68,6 +68,7 @@ static int numextmembers; static void flagInhTables(Archive *fout, TableInfo *tbinfo, int numTables, InhInfo *inhinfo, int numInherits); +static void flagInhIndexes(Archive *fout, TableInfo *tblinfo, int numTables); static void flagInhAttrs(DumpOptions *dopt, TableInfo *tblinfo, int numTables); static DumpableObject **buildIndexArray(void *objArray, int numObjs, Size objSize); @@ -76,6 +77,8 @@ static int ExtensionMemberIdCompare(const void *p1, const void *p2); static void findParentsByOid(TableInfo *self, InhInfo *inhinfo, int numInherits); static int strInArray(const char *pattern, char **arr, int arr_size); +static IndxInfo *findIndexByOid(Oid oid, DumpableObject **idxinfoindex, + int numIndexes); /* @@ -258,6 +261,10 @@ getSchemaData(Archive *fout, int *numTablesPtr) getIndexes(fout, tblinfo, numTables); if (g_verbose) + write_msg(NULL, "flagging indexes in partitioned tables\n"); + flagInhIndexes(fout, tblinfo, numTables); + + if (g_verbose) write_msg(NULL, "reading extended statistics\n"); getExtendedStatistics(fout, tblinfo, numTables); @@ -354,6 +361,89 @@ flagInhTables(Archive *fout, TableInfo *tblinfo, int numTables, } } +/* + * flagInhIndexes - + * Create AttachIndexInfo objects for partitioned indexes, and add + * appropriate dependency links. + */ +static void +flagInhIndexes(Archive *fout, TableInfo tblinfo[], int numTables) +{ + int i, + j, + k; + DumpableObject ***parentIndexArray; + + parentIndexArray = (DumpableObject ***) + pg_malloc0(getMaxDumpId() * sizeof(DumpableObject **)); + + for (i = 0; i < numTables; i++) + { + TableInfo *parenttbl; + IndexAttachInfo *attachinfo; + + if (!tblinfo[i].ispartition || tblinfo[i].numParents == 0) + continue; + + Assert(tblinfo[i].numParents == 1); + parenttbl = tblinfo[i].parents[0]; + + /* + * We need access to each parent table's index list, but there is no + * index to cover them outside of this function. To avoid having to + * sort every parent table's indexes each time we come across each of + * its partitions, create an indexed array for each parent the first + * time it is required. + */ + if (parentIndexArray[parenttbl->dobj.dumpId] == NULL) + parentIndexArray[parenttbl->dobj.dumpId] = + buildIndexArray(parenttbl->indexes, + parenttbl->numIndexes, + sizeof(IndxInfo)); + + attachinfo = (IndexAttachInfo *) + pg_malloc0(tblinfo[i].numIndexes * sizeof(IndexAttachInfo)); + for (j = 0, k = 0; j < tblinfo[i].numIndexes; j++) + { + IndxInfo *index = &(tblinfo[i].indexes[j]); + IndxInfo *parentidx; + + if (index->parentidx == 0) + continue; + + parentidx = findIndexByOid(index->parentidx, + parentIndexArray[parenttbl->dobj.dumpId], + parenttbl->numIndexes); + if (parentidx == NULL) + continue; + + attachinfo[k].dobj.objType = DO_INDEX_ATTACH; + attachinfo[k].dobj.catId.tableoid = 0; + attachinfo[k].dobj.catId.oid = 0; + AssignDumpId(&attachinfo[k].dobj); + attachinfo[k].dobj.name = pg_strdup(index->dobj.name); + attachinfo[k].parentIdx = parentidx; + attachinfo[k].partitionIdx = index; + + /* + * We want dependencies from parent to partition (so that the + * partition index is created first), and another one from + * attach object to parent (so that the partition index is + * attached once the parent index has been created). + */ + addObjectDependency(&parentidx->dobj, index->dobj.dumpId); + addObjectDependency(&attachinfo[k].dobj, parentidx->dobj.dumpId); + + k++; + } + } + + for (i = 0; i < numTables; i++) + if (parentIndexArray[i]) + pg_free(parentIndexArray[i]); + pg_free(parentIndexArray); +} + /* flagInhAttrs - * for each dumpable table in tblinfo, flag its inherited attributes * @@ -827,6 +917,18 @@ findExtensionByOid(Oid oid) return (ExtensionInfo *) findObjectByOid(oid, extinfoindex, numExtensions); } +/* + * findIndexByOid + * find the entry of the index with the given oid + * + * This one's signature is different from the previous ones because we lack a + * global array of all indexes, so caller must pass their array as argument. + */ +static IndxInfo * +findIndexByOid(Oid oid, DumpableObject **idxinfoindex, int numIndexes) +{ + return (IndxInfo *) findObjectByOid(oid, idxinfoindex, numIndexes); +} /* * setExtensionMembership diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 27628a397c..92b29e2e5f 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -193,6 +193,7 @@ static void dumpAttrDef(Archive *fout, AttrDefInfo *adinfo); static void dumpSequence(Archive *fout, TableInfo *tbinfo); static void dumpSequenceData(Archive *fout, TableDataInfo *tdinfo); static void dumpIndex(Archive *fout, IndxInfo *indxinfo); +static void dumpIndexAttach(Archive *fout, IndexAttachInfo *attachinfo); static void dumpStatisticsExt(Archive *fout, StatsExtInfo *statsextinfo); static void dumpConstraint(Archive *fout, ConstraintInfo *coninfo); static void dumpTableConstraintComment(Archive *fout, ConstraintInfo *coninfo); @@ -6509,6 +6510,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) int i_tableoid, i_oid, i_indexname, + i_parentidx, i_indexdef, i_indnkeys, i_indkey, @@ -6530,10 +6532,6 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) { TableInfo *tbinfo = &tblinfo[i]; - /* Only plain tables and materialized views have indexes. */ - if (tbinfo->relkind != RELKIND_RELATION && - tbinfo->relkind != RELKIND_MATVIEW) - continue; if (!tbinfo->hasindex) continue; @@ -6561,7 +6559,39 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) * is not. */ resetPQExpBuffer(query); - if (fout->remoteVersion >= 90400) + if (fout->remoteVersion >= 11000) + { + appendPQExpBuffer(query, + "SELECT t.tableoid, t.oid, " + "t.relname AS indexname, " + "inh.inhparent AS parentidx, " + "pg_catalog.pg_get_indexdef(i.indexrelid) AS indexdef, " + "t.relnatts AS indnkeys, " + "i.indkey, i.indisclustered, " + "i.indisreplident, t.relpages, " + "c.contype, c.conname, " + "c.condeferrable, c.condeferred, " + "c.tableoid AS contableoid, " + "c.oid AS conoid, " + "pg_catalog.pg_get_constraintdef(c.oid, false) AS condef, " + "(SELECT spcname FROM pg_catalog.pg_tablespace s WHERE s.oid = t.reltablespace) AS tablespace, " + "t.reloptions AS indreloptions " + "FROM pg_catalog.pg_index i " + "JOIN pg_catalog.pg_class t ON (t.oid = i.indexrelid) " + "JOIN pg_catalog.pg_class t2 ON (t2.oid = i.indrelid) " + "LEFT JOIN pg_catalog.pg_constraint c " + "ON (i.indrelid = c.conrelid AND " + "i.indexrelid = c.conindid AND " + "c.contype IN ('p','u','x')) " + "LEFT JOIN pg_catalog.pg_inherits inh " + "ON (inh.inhrelid = indexrelid) " + "WHERE i.indrelid = '%u'::pg_catalog.oid " + "AND (i.indisvalid OR t2.relkind = 'p') " + "AND i.indisready " + "ORDER BY indexname", + tbinfo->dobj.catId.oid); + } + else if (fout->remoteVersion >= 90400) { /* * the test on indisready is necessary in 9.2, and harmless in @@ -6570,6 +6600,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) appendPQExpBuffer(query, "SELECT t.tableoid, t.oid, " "t.relname AS indexname, " + "0 AS parentidx, " "pg_catalog.pg_get_indexdef(i.indexrelid) AS indexdef, " "t.relnatts AS indnkeys, " "i.indkey, i.indisclustered, " @@ -6601,6 +6632,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) appendPQExpBuffer(query, "SELECT t.tableoid, t.oid, " "t.relname AS indexname, " + "0 AS parentidx, " "pg_catalog.pg_get_indexdef(i.indexrelid) AS indexdef, " "t.relnatts AS indnkeys, " "i.indkey, i.indisclustered, " @@ -6628,6 +6660,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) appendPQExpBuffer(query, "SELECT t.tableoid, t.oid, " "t.relname AS indexname, " + "0 AS parentidx, " "pg_catalog.pg_get_indexdef(i.indexrelid) AS indexdef, " "t.relnatts AS indnkeys, " "i.indkey, i.indisclustered, " @@ -6658,6 +6691,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) appendPQExpBuffer(query, "SELECT t.tableoid, t.oid, " "t.relname AS indexname, " + "0 AS parentidx, " "pg_catalog.pg_get_indexdef(i.indexrelid) AS indexdef, " "t.relnatts AS indnkeys, " "i.indkey, i.indisclustered, " @@ -6690,6 +6724,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) i_tableoid = PQfnumber(res, "tableoid"); i_oid = PQfnumber(res, "oid"); i_indexname = PQfnumber(res, "indexname"); + i_parentidx = PQfnumber(res, "parentidx"); i_indexdef = PQfnumber(res, "indexdef"); i_indnkeys = PQfnumber(res, "indnkeys"); i_indkey = PQfnumber(res, "indkey"); @@ -6706,8 +6741,10 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) i_tablespace = PQfnumber(res, "tablespace"); i_indreloptions = PQfnumber(res, "indreloptions"); - indxinfo = (IndxInfo *) pg_malloc(ntups * sizeof(IndxInfo)); + tbinfo->indexes = indxinfo = + (IndxInfo *) pg_malloc(ntups * sizeof(IndxInfo)); constrinfo = (ConstraintInfo *) pg_malloc(ntups * sizeof(ConstraintInfo)); + tbinfo->numIndexes = ntups; for (j = 0; j < ntups; j++) { @@ -6729,6 +6766,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) indxinfo[j].indkeys, indxinfo[j].indnkeys); indxinfo[j].indisclustered = (PQgetvalue(res, j, i_indisclustered)[0] == 't'); indxinfo[j].indisreplident = (PQgetvalue(res, j, i_indisreplident)[0] == 't'); + indxinfo[j].parentidx = atooid(PQgetvalue(res, j, i_parentidx)); indxinfo[j].relpages = atoi(PQgetvalue(res, j, i_relpages)); contype = *(PQgetvalue(res, j, i_contype)); @@ -9512,6 +9550,9 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj) case DO_INDEX: dumpIndex(fout, (IndxInfo *) dobj); break; + case DO_INDEX_ATTACH: + dumpIndexAttach(fout, (IndexAttachInfo *) dobj); + break; case DO_STATSEXT: dumpStatisticsExt(fout, (StatsExtInfo *) dobj); break; @@ -16173,6 +16214,42 @@ dumpIndex(Archive *fout, IndxInfo *indxinfo) } /* + * dumpIndexAttach + * write out to fout a partitioned-index attachment clause + */ +void +dumpIndexAttach(Archive *fout, IndexAttachInfo *attachinfo) +{ + if (fout->dopt->dataOnly) + return; + + if (attachinfo->partitionIdx->dobj.dump & DUMP_COMPONENT_DEFINITION) + { + PQExpBuffer q = createPQExpBuffer(); + + appendPQExpBuffer(q, "\nALTER INDEX %s ", + fmtQualifiedId(fout->remoteVersion, + attachinfo->parentIdx->dobj.namespace->dobj.name, + attachinfo->parentIdx->dobj.name)); + appendPQExpBuffer(q, "ATTACH PARTITION %s;\n", + fmtQualifiedId(fout->remoteVersion, + attachinfo->partitionIdx->dobj.namespace->dobj.name, + attachinfo->partitionIdx->dobj.name)); + + ArchiveEntry(fout, attachinfo->dobj.catId, attachinfo->dobj.dumpId, + attachinfo->dobj.name, + NULL, NULL, + "", + false, "INDEX ATTACH", SECTION_POST_DATA, + q->data, "", NULL, + NULL, 0, + NULL, NULL); + + destroyPQExpBuffer(q); + } +} + +/* * dumpStatisticsExt * write out to fout an extended statistics object */ @@ -17803,6 +17880,7 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs, addObjectDependency(postDataBound, dobj->dumpId); break; case DO_INDEX: + case DO_INDEX_ATTACH: case DO_STATSEXT: case DO_REFRESH_MATVIEW: case DO_TRIGGER: diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index 49a02b4fa8..6c18d451ef 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -56,6 +56,7 @@ typedef enum DO_TABLE, DO_ATTRDEF, DO_INDEX, + DO_INDEX_ATTACH, DO_STATSEXT, DO_RULE, DO_TRIGGER, @@ -328,6 +329,8 @@ typedef struct _tableInfo */ int numParents; /* number of (immediate) parent tables */ struct _tableInfo **parents; /* TableInfos of immediate parents */ + int numIndexes; /* number of indexes */ + struct _indxInfo *indexes; /* indexes */ struct _tableDataInfo *dataObj; /* TableDataInfo, if dumping its data */ int numTriggers; /* number of triggers for table */ struct _triggerInfo *triggers; /* array of TriggerInfo structs */ @@ -361,11 +364,19 @@ typedef struct _indxInfo Oid *indkeys; bool indisclustered; bool indisreplident; + Oid parentidx; /* if partitioned, parent index OID */ /* if there is an associated constraint object, its dumpId: */ DumpId indexconstraint; int relpages; /* relpages of the underlying table */ } IndxInfo; +typedef struct _indexAttachInfo +{ + DumpableObject dobj; + IndxInfo *parentIdx; /* link to index on partitioned table */ + IndxInfo *partitionIdx; /* link to index on partition */ +} IndexAttachInfo; + typedef struct _statsExtInfo { DumpableObject dobj; diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c index 6da1c35a42..5ce3c5d485 100644 --- a/src/bin/pg_dump/pg_dump_sort.c +++ b/src/bin/pg_dump/pg_dump_sort.c @@ -35,6 +35,10 @@ static const char *modulename = gettext_noop("sorter"); * pg_dump.c; that is, PRE_DATA objects must sort before DO_PRE_DATA_BOUNDARY, * POST_DATA objects must sort after DO_POST_DATA_BOUNDARY, and DATA objects * must sort between them. + * + * Note: sortDataAndIndexObjectsBySize wants to have all DO_TABLE_DATA and + * DO_INDEX objects in contiguous chunks, so do not reuse the values for those + * for other object types. */ static const int dbObjectTypePriority[] = { @@ -53,11 +57,12 @@ static const int dbObjectTypePriority[] = 18, /* DO_TABLE */ 20, /* DO_ATTRDEF */ 28, /* DO_INDEX */ - 29, /* DO_STATSEXT */ - 30, /* DO_RULE */ - 31, /* DO_TRIGGER */ + 29, /* DO_INDEX_ATTACH */ + 30, /* DO_STATSEXT */ + 31, /* DO_RULE */ + 32, /* DO_TRIGGER */ 27, /* DO_CONSTRAINT */ - 32, /* DO_FK_CONSTRAINT */ + 33, /* DO_FK_CONSTRAINT */ 2, /* DO_PROCLANG */ 10, /* DO_CAST */ 23, /* DO_TABLE_DATA */ @@ -69,18 +74,18 @@ static const int dbObjectTypePriority[] = 15, /* DO_TSCONFIG */ 16, /* DO_FDW */ 17, /* DO_FOREIGN_SERVER */ - 32, /* DO_DEFAULT_ACL */ + 33, /* DO_DEFAULT_ACL */ 3, /* DO_TRANSFORM */ 21, /* DO_BLOB */ 25, /* DO_BLOB_DATA */ 22, /* DO_PRE_DATA_BOUNDARY */ 26, /* DO_POST_DATA_BOUNDARY */ - 33, /* DO_EVENT_TRIGGER */ - 38, /* DO_REFRESH_MATVIEW */ - 34, /* DO_POLICY */ - 35, /* DO_PUBLICATION */ - 36, /* DO_PUBLICATION_REL */ - 37 /* DO_SUBSCRIPTION */ + 34, /* DO_EVENT_TRIGGER */ + 39, /* DO_REFRESH_MATVIEW */ + 35, /* DO_POLICY */ + 36, /* DO_PUBLICATION */ + 37, /* DO_PUBLICATION_REL */ + 38 /* DO_SUBSCRIPTION */ }; static DumpId preDataBoundId; @@ -937,6 +942,13 @@ repairDomainConstraintMultiLoop(DumpableObject *domainobj, addObjectDependency(constraintobj, postDataBoundId); } +static void +repairIndexLoop(DumpableObject *partedindex, + DumpableObject *partindex) +{ + removeObjectDependency(partedindex, partindex->dumpId); +} + /* * Fix a dependency loop, or die trying ... * @@ -1099,6 +1111,23 @@ repairDependencyLoop(DumpableObject **loop, return; } + /* index on partitioned table and corresponding index on partition */ + if (nLoop == 2 && + loop[0]->objType == DO_INDEX && + loop[1]->objType == DO_INDEX) + { + if (((IndxInfo *) loop[0])->parentidx == loop[1]->catId.oid) + { + repairIndexLoop(loop[0], loop[1]); + return; + } + else if (((IndxInfo *) loop[1])->parentidx == loop[0]->catId.oid) + { + repairIndexLoop(loop[1], loop[0]); + return; + } + } + /* Indirect loop involving table and attribute default */ if (nLoop > 2) { @@ -1292,6 +1321,11 @@ describeDumpableObject(DumpableObject *obj, char *buf, int bufsize) "INDEX %s (ID %d OID %u)", obj->name, obj->dumpId, obj->catId.oid); return; + case DO_INDEX_ATTACH: + snprintf(buf, bufsize, + "INDEX ATTACH %s (ID %d)", + obj->name, obj->dumpId); + return; case DO_STATSEXT: snprintf(buf, bufsize, "STATISTICS %s (ID %d OID %u)", diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index f2e62946d8..7a0c2423ac 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -1705,7 +1705,8 @@ describeOneTableDetails(const char *schemaname, appendPQExpBufferStr(&buf, ",\n a.attidentity"); else appendPQExpBufferStr(&buf, ",\n ''::pg_catalog.char AS attidentity"); - if (tableinfo.relkind == RELKIND_INDEX) + if (tableinfo.relkind == RELKIND_INDEX || + tableinfo.relkind == RELKIND_PARTITIONED_INDEX) appendPQExpBufferStr(&buf, ",\n pg_catalog.pg_get_indexdef(a.attrelid, a.attnum, TRUE) AS indexdef"); else appendPQExpBufferStr(&buf, ",\n NULL AS indexdef"); @@ -1766,6 +1767,7 @@ describeOneTableDetails(const char *schemaname, schemaname, relationname); break; case RELKIND_INDEX: + case RELKIND_PARTITIONED_INDEX: if (tableinfo.relpersistence == 'u') printfPQExpBuffer(&title, _("Unlogged index \"%s.%s\""), schemaname, relationname); @@ -1823,7 +1825,8 @@ describeOneTableDetails(const char *schemaname, show_column_details = true; } - if (tableinfo.relkind == RELKIND_INDEX) + if (tableinfo.relkind == RELKIND_INDEX || + tableinfo.relkind == RELKIND_PARTITIONED_INDEX) headers[cols++] = gettext_noop("Definition"); if (tableinfo.relkind == RELKIND_FOREIGN_TABLE && pset.sversion >= 90200) @@ -1834,6 +1837,7 @@ describeOneTableDetails(const char *schemaname, headers[cols++] = gettext_noop("Storage"); if (tableinfo.relkind == RELKIND_RELATION || tableinfo.relkind == RELKIND_INDEX || + tableinfo.relkind == RELKIND_PARTITIONED_INDEX || tableinfo.relkind == RELKIND_MATVIEW || tableinfo.relkind == RELKIND_FOREIGN_TABLE || tableinfo.relkind == RELKIND_PARTITIONED_TABLE) @@ -1906,7 +1910,8 @@ describeOneTableDetails(const char *schemaname, } /* Expression for index column */ - if (tableinfo.relkind == RELKIND_INDEX) + if (tableinfo.relkind == RELKIND_INDEX || + tableinfo.relkind == RELKIND_PARTITIONED_INDEX) printTableAddCell(&cont, PQgetvalue(res, i, 7), false, false); /* FDW options for foreign table column, only for 9.2 or later */ @@ -1930,6 +1935,7 @@ describeOneTableDetails(const char *schemaname, /* Statistics target, if the relkind supports this feature */ if (tableinfo.relkind == RELKIND_RELATION || tableinfo.relkind == RELKIND_INDEX || + tableinfo.relkind == RELKIND_PARTITIONED_INDEX || tableinfo.relkind == RELKIND_MATVIEW || tableinfo.relkind == RELKIND_FOREIGN_TABLE || tableinfo.relkind == RELKIND_PARTITIONED_TABLE) @@ -2021,7 +2027,8 @@ describeOneTableDetails(const char *schemaname, PQclear(result); } - if (tableinfo.relkind == RELKIND_INDEX) + if (tableinfo.relkind == RELKIND_INDEX || + tableinfo.relkind == RELKIND_PARTITIONED_INDEX) { /* Footer information about an index */ PGresult *result; @@ -3397,6 +3404,7 @@ listTables(const char *tabtypes, const char *pattern, bool verbose, bool showSys " WHEN 's' THEN '%s'" " WHEN " CppAsString2(RELKIND_FOREIGN_TABLE) " THEN '%s'" " WHEN " CppAsString2(RELKIND_PARTITIONED_TABLE) " THEN '%s'" + " WHEN " CppAsString2(RELKIND_PARTITIONED_INDEX) " THEN '%s'" " END as \"%s\",\n" " pg_catalog.pg_get_userbyid(c.relowner) as \"%s\"", gettext_noop("Schema"), @@ -3409,6 +3417,7 @@ listTables(const char *tabtypes, const char *pattern, bool verbose, bool showSys gettext_noop("special"), gettext_noop("foreign table"), gettext_noop("table"), /* partitioned table */ + gettext_noop("index"), /* partitioned index */ gettext_noop("Type"), gettext_noop("Owner")); @@ -3454,7 +3463,8 @@ listTables(const char *tabtypes, const char *pattern, bool verbose, bool showSys if (showMatViews) appendPQExpBufferStr(&buf, CppAsString2(RELKIND_MATVIEW) ","); if (showIndexes) - appendPQExpBufferStr(&buf, CppAsString2(RELKIND_INDEX) ","); + appendPQExpBufferStr(&buf, CppAsString2(RELKIND_INDEX) "," + CppAsString2(RELKIND_PARTITIONED_INDEX) ","); if (showSeq) appendPQExpBufferStr(&buf, CppAsString2(RELKIND_SEQUENCE) ","); if (showSystem || pattern) diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index b51098deca..8bc4a194a5 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -412,7 +412,8 @@ static const SchemaQuery Query_for_list_of_indexes = { /* catname */ "pg_catalog.pg_class c", /* selcondition */ - "c.relkind IN (" CppAsString2(RELKIND_INDEX) ")", + "c.relkind IN (" CppAsString2(RELKIND_INDEX) ", " + CppAsString2(RELKIND_PARTITIONED_INDEX) ")", /* viscondition */ "pg_catalog.pg_table_is_visible(c.oid)", /* namespace */ @@ -600,6 +601,23 @@ static const SchemaQuery Query_for_list_of_tmf = { NULL }; +static const SchemaQuery Query_for_list_of_tpm = { + /* catname */ + "pg_catalog.pg_class c", + /* selcondition */ + "c.relkind IN (" CppAsString2(RELKIND_RELATION) ", " + CppAsString2(RELKIND_PARTITIONED_TABLE) ", " + CppAsString2(RELKIND_MATVIEW) ")", + /* viscondition */ + "pg_catalog.pg_table_is_visible(c.oid)", + /* namespace */ + "c.relnamespace", + /* result */ + "pg_catalog.quote_ident(c.relname)", + /* qualresult */ + NULL +}; + static const SchemaQuery Query_for_list_of_tm = { /* catname */ "pg_catalog.pg_class c", @@ -1676,7 +1694,12 @@ psql_completion(const char *text, int start, int end) "UNION SELECT 'ALL IN TABLESPACE'"); /* ALTER INDEX */ else if (Matches3("ALTER", "INDEX", MatchAny)) - COMPLETE_WITH_LIST5("ALTER COLUMN", "OWNER TO", "RENAME TO", "SET", "RESET"); + COMPLETE_WITH_LIST6("ALTER COLUMN", "OWNER TO", "RENAME TO", "SET", + "RESET", "ATTACH PARTITION"); + else if (Matches4("ALTER", "INDEX", MatchAny, "ATTACH")) + COMPLETE_WITH_CONST("PARTITION"); + else if (Matches5("ALTER", "INDEX", MatchAny, "ATTACH", "PARTITION")) + COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_indexes, NULL); /* ALTER INDEX ALTER COLUMN */ else if (Matches6("ALTER", "INDEX", MatchAny, "ALTER", "COLUMN", MatchAny)) COMPLETE_WITH_CONST("SET STATISTICS"); @@ -2338,10 +2361,13 @@ psql_completion(const char *text, int start, int end) COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_indexes, " UNION SELECT 'ON'" " UNION SELECT 'CONCURRENTLY'"); - /* Complete ... INDEX|CONCURRENTLY [] ON with a list of tables */ + /* + * Complete ... INDEX|CONCURRENTLY [] ON with a list of relations + * that can indexes can be created on + */ else if (TailMatches3("INDEX|CONCURRENTLY", MatchAny, "ON") || TailMatches2("INDEX|CONCURRENTLY", "ON")) - COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_tm, NULL); + COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_tpm, NULL); /* * Complete CREATE|UNIQUE INDEX CONCURRENTLY with "ON" and existing diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index f1765af4ba..2b2c166266 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201712251 +#define CATALOG_VERSION_NO 201712291 #endif diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h index 6f290d5c6f..67d8a20be3 100644 --- a/src/include/catalog/dependency.h +++ b/src/include/catalog/dependency.h @@ -49,6 +49,20 @@ * Example: a trigger that's created to enforce a foreign-key constraint * is made internally dependent on the constraint's pg_constraint entry. * + * DEPENDENCY_INTERNAL_AUTO ('I'): the dependent object was created as + * part of creation of the referenced object, and is really just a part + * of its internal implementation. A DROP of the dependent object will + * be disallowed outright (we'll tell the user to issue a DROP against the + * referenced object, instead). While a regular internal dependency will + * prevent the dependent object from being dropped while any such + * dependencies remain, DEPENDENCY_INTERNAL_AUTO will allow such a drop as + * long as the object can be found by following any of such dependencies. + * Example: an index on a partition is made internal-auto-dependent on + * both the partition itself as well as on the index on the parent + * partitioned table; so the partition index is dropped together with + * either partition it indexes, or with the parent index it is attached + * to. + * DEPENDENCY_EXTENSION ('e'): the dependent object is a member of the * extension that is the referenced object. The dependent object can be * dropped only via DROP EXTENSION on the referenced object. Functionally @@ -75,6 +89,7 @@ typedef enum DependencyType DEPENDENCY_NORMAL = 'n', DEPENDENCY_AUTO = 'a', DEPENDENCY_INTERNAL = 'i', + DEPENDENCY_INTERNAL_AUTO = 'I', DEPENDENCY_EXTENSION = 'e', DEPENDENCY_AUTO_EXTENSION = 'x', DEPENDENCY_PIN = 'p' diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index 12bf35567a..139365c3b3 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -47,10 +47,13 @@ extern void index_check_primary_key(Relation heapRel, #define INDEX_CREATE_SKIP_BUILD (1 << 2) #define INDEX_CREATE_CONCURRENT (1 << 3) #define INDEX_CREATE_IF_NOT_EXISTS (1 << 4) +#define INDEX_CREATE_PARTITIONED (1 << 5) +#define INDEX_CREATE_INVALID (1 << 6) extern Oid index_create(Relation heapRelation, const char *indexRelationName, Oid indexRelationId, + Oid parentIndexRelid, Oid relFileNode, IndexInfo *indexInfo, List *indexColNames, @@ -84,6 +87,8 @@ extern void index_drop(Oid indexId, bool concurrent); extern IndexInfo *BuildIndexInfo(Relation index); +extern bool CompareIndexInfo(IndexInfo *info1, IndexInfo *info2, AttrNumber *attmap); + extern void BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii); extern void FormIndexDatum(IndexInfo *indexInfo, @@ -134,4 +139,6 @@ extern bool ReindexIsProcessingHeap(Oid heapOid); extern bool ReindexIsProcessingIndex(Oid indexOid); extern Oid IndexGetRelation(Oid indexId, bool missing_ok); +extern void IndexSetParentIndex(Relation idx, Oid parentOid); + #endif /* INDEX_H */ diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index e7049438eb..26b1866c69 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -166,6 +166,7 @@ DESCR(""); #define RELKIND_COMPOSITE_TYPE 'c' /* composite type */ #define RELKIND_FOREIGN_TABLE 'f' /* foreign table */ #define RELKIND_PARTITIONED_TABLE 'p' /* partitioned table */ +#define RELKIND_PARTITIONED_INDEX 'I' /* partitioned index */ #define RELPERSISTENCE_PERMANENT 'p' /* regular table */ #define RELPERSISTENCE_UNLOGGED 'u' /* unlogged permanent table */ diff --git a/src/include/catalog/pg_inherits_fn.h b/src/include/catalog/pg_inherits_fn.h index 405af230d1..b587f2d042 100644 --- a/src/include/catalog/pg_inherits_fn.h +++ b/src/include/catalog/pg_inherits_fn.h @@ -23,5 +23,7 @@ extern List *find_all_inheritors(Oid parentrelId, LOCKMODE lockmode, extern bool has_subclass(Oid relationId); extern bool has_superclass(Oid relationId); extern bool typeInheritsFrom(Oid subclassTypeId, Oid superclassTypeId); +extern void StoreSingleInheritance(Oid relationId, Oid parentOid, + int16 seqNumber); #endif /* PG_INHERITS_FN_H */ diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h index 1f18cad963..41007162aa 100644 --- a/src/include/commands/defrem.h +++ b/src/include/commands/defrem.h @@ -25,12 +25,13 @@ extern void RemoveObjects(DropStmt *stmt); extern ObjectAddress DefineIndex(Oid relationId, IndexStmt *stmt, Oid indexRelationId, + Oid parentIndexId, bool is_alter_table, bool check_rights, bool check_not_in_use, bool skip_build, bool quiet); -extern Oid ReindexIndex(RangeVar *indexRelation, int options); +extern void ReindexIndex(RangeVar *indexRelation, int options); extern Oid ReindexTable(RangeVar *relation, int options); extern void ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, int options); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 2a4f7407a1..f2f216e049 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -158,6 +158,7 @@ typedef struct IndexInfo bool ii_ReadyForInserts; bool ii_Concurrent; bool ii_BrokenHotChain; + Oid ii_Am; void *ii_AmCache; MemoryContext ii_Context; } IndexInfo; diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index b72178efd1..0296784726 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -839,7 +839,7 @@ typedef struct PartitionRangeDatum } PartitionRangeDatum; /* - * PartitionCmd - info for ALTER TABLE ATTACH/DETACH PARTITION commands + * PartitionCmd - info for ALTER TABLE/INDEX ATTACH/DETACH PARTITION commands */ typedef struct PartitionCmd { @@ -2702,6 +2702,10 @@ typedef struct FetchStmt * index, just a UNIQUE/PKEY constraint using an existing index. isconstraint * must always be true in this case, and the fields describing the index * properties are empty. + * + * The relation to build the index on can be represented either by name + * (in which case the RangeVar indicates whether to recurse or not) or by OID + * (in which case the command is always recursive). * ---------------------- */ typedef struct IndexStmt @@ -2709,6 +2713,7 @@ typedef struct IndexStmt NodeTag type; char *idxname; /* name of new index, or NULL for default */ RangeVar *relation; /* relation to build index on */ + Oid relationId; /* OID of relation to build index on */ char *accessMethod; /* name of access method (eg. btree) */ char *tableSpace; /* tablespace, or NULL for default */ List *indexParams; /* columns to index: a list of IndexElem */ diff --git a/src/include/parser/parse_utilcmd.h b/src/include/parser/parse_utilcmd.h index a7f5e0caea..64aa8234e5 100644 --- a/src/include/parser/parse_utilcmd.h +++ b/src/include/parser/parse_utilcmd.h @@ -27,5 +27,8 @@ extern void transformRuleStmt(RuleStmt *stmt, const char *queryString, extern List *transformCreateSchemaStmt(CreateSchemaStmt *stmt); extern PartitionBoundSpec *transformPartitionBound(ParseState *pstate, Relation parent, PartitionBoundSpec *spec); +extern IndexStmt *generateClonedIndexStmt(RangeVar *heapRel, Oid heapOid, + Relation source_idx, + const AttrNumber *attmap, int attmap_length); #endif /* PARSE_UTILCMD_H */ diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out index 11f0baa11b..517fb080bd 100644 --- a/src/test/regress/expected/alter_table.out +++ b/src/test/regress/expected/alter_table.out @@ -1965,6 +1965,67 @@ create table tab1 (a int, b text); create table tab2 (x int, y tab1); alter table tab1 alter column b type varchar; -- fails ERROR: cannot alter table "tab1" because column "tab2.y" uses its row type +-- Alter column type that's part of a partitioned index +create table at_partitioned (a int, b text) partition by range (a); +create table at_part_1 partition of at_partitioned for values from (0) to (1000); +insert into at_partitioned values (512, '0.123'); +create table at_part_2 (b text, a int); +insert into at_part_2 values ('1.234', 1024); +create index on at_partitioned (b); +create index on at_partitioned (a); +\d at_part_1 + Table "public.at_part_1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | text | | | +Partition of: at_partitioned FOR VALUES FROM (0) TO (1000) +Indexes: + "at_part_1_a_idx" btree (a) + "at_part_1_b_idx" btree (b) + +\d at_part_2 + Table "public.at_part_2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + b | text | | | + a | integer | | | + +alter table at_partitioned attach partition at_part_2 for values from (1000) to (2000); +\d at_part_2 + Table "public.at_part_2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + b | text | | | + a | integer | | | +Partition of: at_partitioned FOR VALUES FROM (1000) TO (2000) +Indexes: + "at_part_2_a_idx" btree (a) + "at_part_2_b_idx" btree (b) + +alter table at_partitioned alter column b type numeric using b::numeric; +\d at_part_1 + Table "public.at_part_1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | numeric | | | +Partition of: at_partitioned FOR VALUES FROM (0) TO (1000) +Indexes: + "at_part_1_a_idx" btree (a) + "at_part_1_b_idx" btree (b) + +\d at_part_2 + Table "public.at_part_2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + b | numeric | | | + a | integer | | | +Partition of: at_partitioned FOR VALUES FROM (1000) TO (2000) +Indexes: + "at_part_2_a_idx" btree (a) + "at_part_2_b_idx" btree (b) + -- disallow recursive containment of row types create temp table recur1 (f1 int); alter table recur1 add column f2 recur1; -- fails @@ -3276,7 +3337,7 @@ CREATE TABLE unparted ( ); CREATE TABLE fail_part (like unparted); ALTER TABLE unparted ATTACH PARTITION fail_part FOR VALUES IN ('a'); -ERROR: "unparted" is not partitioned +ERROR: table "unparted" is not partitioned DROP TABLE unparted, fail_part; -- check that partition bound is compatible CREATE TABLE list_parted ( @@ -3656,7 +3717,7 @@ DROP TABLE fail_part; -- check that the table is partitioned at all CREATE TABLE regular_table (a int); ALTER TABLE regular_table DETACH PARTITION any_name; -ERROR: "regular_table" is not partitioned +ERROR: table "regular_table" is not partitioned DROP TABLE regular_table; -- check that the partition being detached exists at all ALTER TABLE list_parted2 DETACH PARTITION part_4; diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out new file mode 100644 index 0000000000..4c0f07f74b --- /dev/null +++ b/src/test/regress/expected/indexing.out @@ -0,0 +1,541 @@ +-- Creating an index on a partitioned table makes the partitions +-- automatically get the index +create table idxpart (a int, b int, c text) partition by range (a); +create table idxpart1 partition of idxpart for values from (0) to (10); +create table idxpart2 partition of idxpart for values from (10) to (100) + partition by range (b); +create table idxpart21 partition of idxpart2 for values from (0) to (100); +create index on idxpart (a); +select relname, relkind, inhparent::regclass + from pg_class left join pg_index ix on (indexrelid = oid) + left join pg_inherits on (ix.indexrelid = inhrelid) + where relname like 'idxpart%' order by relname; + relname | relkind | inhparent +-----------------+---------+---------------- + idxpart | p | + idxpart1 | r | + idxpart1_a_idx | i | idxpart_a_idx + idxpart2 | p | + idxpart21 | r | + idxpart21_a_idx | i | idxpart2_a_idx + idxpart2_a_idx | I | idxpart_a_idx + idxpart_a_idx | I | +(8 rows) + +drop table idxpart; +-- Some unsupported features +create table idxpart (a int, b int, c text) partition by range (a); +create table idxpart1 partition of idxpart for values from (0) to (10); +create unique index on idxpart (a); +ERROR: cannot create unique index on partitioned table "idxpart" +create index concurrently on idxpart (a); +ERROR: cannot create index on partitioned table "idxpart" concurrently +drop table idxpart; +-- If a table without index is attached as partition to a table with +-- an index, the index is automatically created +create table idxpart (a int, b int, c text) partition by range (a); +create index idxparti on idxpart (a); +create index idxparti2 on idxpart (b, c); +create table idxpart1 (like idxpart); +\d idxpart1 + Table "public.idxpart1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | text | | | + +alter table idxpart attach partition idxpart1 for values from (0) to (10); +\d idxpart1 + Table "public.idxpart1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | text | | | +Partition of: idxpart FOR VALUES FROM (0) TO (10) +Indexes: + "idxpart1_a_idx" btree (a) + "idxpart1_b_c_idx" btree (b, c) + +drop table idxpart; +-- If a partition already has an index, don't create a duplicative one +create table idxpart (a int, b int) partition by range (a, b); +create table idxpart1 partition of idxpart for values from (0, 0) to (10, 10); +create index on idxpart1 (a, b); +create index on idxpart (a, b); +\d idxpart1 + Table "public.idxpart1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | +Partition of: idxpart FOR VALUES FROM (0, 0) TO (10, 10) +Indexes: + "idxpart1_a_b_idx" btree (a, b) + +select relname, relkind, inhparent::regclass + from pg_class left join pg_index ix on (indexrelid = oid) + left join pg_inherits on (ix.indexrelid = inhrelid) + where relname like 'idxpart%' order by relname; + relname | relkind | inhparent +------------------+---------+----------------- + idxpart | p | + idxpart1 | r | + idxpart1_a_b_idx | i | idxpart_a_b_idx + idxpart_a_b_idx | I | +(4 rows) + +drop table idxpart; +-- DROP behavior for partitioned indexes +create table idxpart (a int) partition by range (a); +create index on idxpart (a); +create table idxpart1 partition of idxpart for values from (0) to (10); +drop index idxpart1_a_idx; -- no way +ERROR: cannot drop index idxpart1_a_idx because index idxpart_a_idx requires it +HINT: You can drop index idxpart_a_idx instead. +drop index idxpart_a_idx; -- both indexes go away +select relname, relkind from pg_class + where relname like 'idxpart%' order by relname; + relname | relkind +----------+--------- + idxpart | p + idxpart1 | r +(2 rows) + +create index on idxpart (a); +drop table idxpart1; -- the index on partition goes away too +select relname, relkind from pg_class + where relname like 'idxpart%' order by relname; + relname | relkind +---------------+--------- + idxpart | p + idxpart_a_idx | I +(2 rows) + +drop table idxpart; +-- ALTER INDEX .. ATTACH, error cases +create table idxpart (a int, b int) partition by range (a, b); +create table idxpart1 partition of idxpart for values from (0, 0) to (10, 10); +create index idxpart_a_b_idx on only idxpart (a, b); +create index idxpart1_a_b_idx on idxpart1 (a, b); +create index idxpart1_tst1 on idxpart1 (b, a); +create index idxpart1_tst2 on idxpart1 using hash (a); +create index idxpart1_tst3 on idxpart1 (a, b) where a > 10; +alter index idxpart attach partition idxpart1; +ERROR: "idxpart" is not an index +alter index idxpart_a_b_idx attach partition idxpart1; +ERROR: "idxpart1" is not an index +alter index idxpart_a_b_idx attach partition idxpart_a_b_idx; +ERROR: cannot attach index "idxpart_a_b_idx" as a partition of index "idxpart_a_b_idx" +DETAIL: Index "idxpart_a_b_idx" is not on a partition of table "idxpart". +alter index idxpart_a_b_idx attach partition idxpart1_b_idx; +ERROR: relation "idxpart1_b_idx" does not exist +alter index idxpart_a_b_idx attach partition idxpart1_tst1; +ERROR: cannot attach index "idxpart1_tst1" as a partition of index "idxpart_a_b_idx" +DETAIL: The index definitions do not match. +alter index idxpart_a_b_idx attach partition idxpart1_tst2; +ERROR: cannot attach index "idxpart1_tst2" as a partition of index "idxpart_a_b_idx" +DETAIL: The index definitions do not match. +alter index idxpart_a_b_idx attach partition idxpart1_tst3; +ERROR: cannot attach index "idxpart1_tst3" as a partition of index "idxpart_a_b_idx" +DETAIL: The index definitions do not match. +-- OK +alter index idxpart_a_b_idx attach partition idxpart1_a_b_idx; +alter index idxpart_a_b_idx attach partition idxpart1_a_b_idx; -- quiet +-- reject dupe +create index idxpart1_2_a_b on idxpart1 (a, b); +alter index idxpart_a_b_idx attach partition idxpart1_2_a_b; +ERROR: cannot attach index "idxpart1_2_a_b" as a partition of index "idxpart_a_b_idx" +DETAIL: Another index is already attached for partition "idxpart1". +drop table idxpart; +-- make sure everything's gone +select indexrelid::regclass, indrelid::regclass + from pg_index where indexrelid::regclass::text like 'idxpart%'; + indexrelid | indrelid +------------+---------- +(0 rows) + +-- Don't auto-attach incompatible indexes +create table idxpart (a int, b int) partition by range (a); +create table idxpart1 (a int, b int); +create index on idxpart1 using hash (a); +create index on idxpart1 (a) where b > 1; +create index on idxpart1 ((a + 0)); +create index on idxpart1 (a, a); +create index on idxpart (a); +alter table idxpart attach partition idxpart1 for values from (0) to (1000); +\d idxpart1 + Table "public.idxpart1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | +Partition of: idxpart FOR VALUES FROM (0) TO (1000) +Indexes: + "idxpart1_a_a1_idx" btree (a, a) + "idxpart1_a_idx" hash (a) + "idxpart1_a_idx1" btree (a) WHERE b > 1 + "idxpart1_a_idx2" btree (a) + "idxpart1_expr_idx" btree ((a + 0)) + +drop table idxpart; +-- If CREATE INDEX ONLY, don't create indexes on partitions; and existing +-- indexes on partitions don't change parent. ALTER INDEX ATTACH can change +-- the parent after the fact. +create table idxpart (a int) partition by range (a); +create table idxpart1 partition of idxpart for values from (0) to (100); +create table idxpart2 partition of idxpart for values from (100) to (1000) + partition by range (a); +create table idxpart21 partition of idxpart2 for values from (100) to (200); +create table idxpart22 partition of idxpart2 for values from (200) to (300); +create index on idxpart22 (a); +create index on only idxpart2 (a); +create index on idxpart (a); +-- Here we expect that idxpart1 and idxpart2 have a new index, but idxpart21 +-- does not; also, idxpart22 is not attached. +\d idxpart1 + Table "public.idxpart1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition of: idxpart FOR VALUES FROM (0) TO (100) +Indexes: + "idxpart1_a_idx" btree (a) + +\d idxpart2 + Table "public.idxpart2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition of: idxpart FOR VALUES FROM (100) TO (1000) +Partition key: RANGE (a) +Indexes: + "idxpart2_a_idx" btree (a) INVALID +Number of partitions: 2 (Use \d+ to list them.) + +\d idxpart21 + Table "public.idxpart21" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition of: idxpart2 FOR VALUES FROM (100) TO (200) + +select indexrelid::regclass, indrelid::regclass, inhparent::regclass + from pg_index idx left join pg_inherits inh on (idx.indexrelid = inh.inhrelid) +where indexrelid::regclass::text like 'idxpart%' + order by indrelid::regclass; + indexrelid | indrelid | inhparent +-----------------+-----------+--------------- + idxpart_a_idx | idxpart | + idxpart1_a_idx | idxpart1 | idxpart_a_idx + idxpart2_a_idx | idxpart2 | idxpart_a_idx + idxpart22_a_idx | idxpart22 | +(4 rows) + +alter index idxpart2_a_idx attach partition idxpart22_a_idx; +select indexrelid::regclass, indrelid::regclass, inhparent::regclass + from pg_index idx left join pg_inherits inh on (idx.indexrelid = inh.inhrelid) +where indexrelid::regclass::text like 'idxpart%' + order by indrelid::regclass; + indexrelid | indrelid | inhparent +-----------------+-----------+---------------- + idxpart_a_idx | idxpart | + idxpart1_a_idx | idxpart1 | idxpart_a_idx + idxpart2_a_idx | idxpart2 | idxpart_a_idx + idxpart22_a_idx | idxpart22 | idxpart2_a_idx +(4 rows) + +-- attaching idxpart22 is not enough to set idxpart22_a_idx valid ... +alter index idxpart2_a_idx attach partition idxpart22_a_idx; +\d idxpart2 + Table "public.idxpart2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition of: idxpart FOR VALUES FROM (100) TO (1000) +Partition key: RANGE (a) +Indexes: + "idxpart2_a_idx" btree (a) INVALID +Number of partitions: 2 (Use \d+ to list them.) + +-- ... but this one is. +create index on idxpart21 (a); +alter index idxpart2_a_idx attach partition idxpart21_a_idx; +\d idxpart2 + Table "public.idxpart2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition of: idxpart FOR VALUES FROM (100) TO (1000) +Partition key: RANGE (a) +Indexes: + "idxpart2_a_idx" btree (a) +Number of partitions: 2 (Use \d+ to list them.) + +drop table idxpart; +-- When a table is attached a partition and it already has an index, a +-- duplicate index should not get created, but rather the index becomes +-- attached to the parent's index. +create table idxpart (a int, b int, c text) partition by range (a); +create index idxparti on idxpart (a); +create index idxparti2 on idxpart (b, c); +create table idxpart1 (like idxpart including indexes); +\d idxpart1 + Table "public.idxpart1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | text | | | +Indexes: + "idxpart1_a_idx" btree (a) + "idxpart1_b_c_idx" btree (b, c) + +select relname, relkind, inhparent::regclass + from pg_class left join pg_index ix on (indexrelid = oid) + left join pg_inherits on (ix.indexrelid = inhrelid) + where relname like 'idxpart%' order by relname; + relname | relkind | inhparent +------------------+---------+----------- + idxpart | p | + idxpart1 | r | + idxpart1_a_idx | i | + idxpart1_b_c_idx | i | + idxparti | I | + idxparti2 | I | +(6 rows) + +alter table idxpart attach partition idxpart1 for values from (0) to (10); +\d idxpart1 + Table "public.idxpart1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | text | | | +Partition of: idxpart FOR VALUES FROM (0) TO (10) +Indexes: + "idxpart1_a_idx" btree (a) + "idxpart1_b_c_idx" btree (b, c) + +select relname, relkind, inhparent::regclass + from pg_class left join pg_index ix on (indexrelid = oid) + left join pg_inherits on (ix.indexrelid = inhrelid) + where relname like 'idxpart%' order by relname; + relname | relkind | inhparent +------------------+---------+----------- + idxpart | p | + idxpart1 | r | + idxpart1_a_idx | i | idxparti + idxpart1_b_c_idx | i | idxparti2 + idxparti | I | + idxparti2 | I | +(6 rows) + +drop table idxpart; +-- verify dependency handling during ALTER TABLE DETACH PARTITION +create table idxpart (a int) partition by range (a); +create table idxpart1 (like idxpart); +create index on idxpart1 (a); +create index on idxpart (a); +create table idxpart2 (like idxpart); +alter table idxpart attach partition idxpart1 for values from (0000) to (1000); +alter table idxpart attach partition idxpart2 for values from (1000) to (2000); +create table idxpart3 partition of idxpart for values from (2000) to (3000); +select relname, relkind from pg_class where relname like 'idxpart%'; + relname | relkind +----------------+--------- + idxpart | p + idxpart1 | r + idxpart1_a_idx | i + idxpart2 | r + idxpart2_a_idx | i + idxpart3 | r + idxpart3_a_idx | i + idxpart_a_idx | I +(8 rows) + +-- a) after detaching partitions, the indexes can be dropped independently +alter table idxpart detach partition idxpart1; +alter table idxpart detach partition idxpart2; +alter table idxpart detach partition idxpart3; +drop index idxpart1_a_idx; +drop index idxpart2_a_idx; +drop index idxpart3_a_idx; +select relname, relkind from pg_class where relname like 'idxpart%'; + relname | relkind +---------------+--------- + idxpart | p + idxpart1 | r + idxpart2 | r + idxpart3 | r + idxpart_a_idx | I +(5 rows) + +drop table idxpart, idxpart1, idxpart2, idxpart3; +select relname, relkind from pg_class where relname like 'idxpart%'; + relname | relkind +---------+--------- +(0 rows) + +create table idxpart (a int) partition by range (a); +create table idxpart1 (like idxpart); +create index on idxpart1 (a); +create index on idxpart (a); +create table idxpart2 (like idxpart); +alter table idxpart attach partition idxpart1 for values from (0000) to (1000); +alter table idxpart attach partition idxpart2 for values from (1000) to (2000); +create table idxpart3 partition of idxpart for values from (2000) to (3000); +-- b) after detaching, dropping the index on parent does not remove the others +select relname, relkind from pg_class where relname like 'idxpart%'; + relname | relkind +----------------+--------- + idxpart | p + idxpart1 | r + idxpart1_a_idx | i + idxpart2 | r + idxpart2_a_idx | i + idxpart3 | r + idxpart3_a_idx | i + idxpart_a_idx | I +(8 rows) + +alter table idxpart detach partition idxpart1; +alter table idxpart detach partition idxpart2; +alter table idxpart detach partition idxpart3; +drop index idxpart_a_idx; +select relname, relkind from pg_class where relname like 'idxpart%'; + relname | relkind +----------------+--------- + idxpart | p + idxpart1 | r + idxpart1_a_idx | i + idxpart2 | r + idxpart2_a_idx | i + idxpart3 | r + idxpart3_a_idx | i +(7 rows) + +drop table idxpart, idxpart1, idxpart2, idxpart3; +select relname, relkind from pg_class where relname like 'idxpart%'; + relname | relkind +---------+--------- +(0 rows) + +-- Make sure the partition columns are mapped correctly +create table idxpart (a int, b int, c text) partition by range (a); +create index idxparti on idxpart (a); +create index idxparti2 on idxpart (c, b); +create table idxpart1 (c text, a int, b int); +alter table idxpart attach partition idxpart1 for values from (0) to (10); +\d idxpart1 + Table "public.idxpart1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + c | text | | | + a | integer | | | + b | integer | | | +Partition of: idxpart FOR VALUES FROM (0) TO (10) +Indexes: + "idxpart1_a_idx" btree (a) + "idxpart1_c_b_idx" btree (c, b) + +drop table idxpart; +-- Column number mapping: dropped columns in the partition +create table idxpart1 (drop_1 int, drop_2 int, col_keep int, drop_3 int); +alter table idxpart1 drop column drop_1; +alter table idxpart1 drop column drop_2; +alter table idxpart1 drop column drop_3; +create index on idxpart1 (col_keep); +create table idxpart (col_keep int) partition by range (col_keep); +create index on idxpart (col_keep); +alter table idxpart attach partition idxpart1 for values from (0) to (1000); +\d idxpart + Table "public.idxpart" + Column | Type | Collation | Nullable | Default +----------+---------+-----------+----------+--------- + col_keep | integer | | | +Partition key: RANGE (col_keep) +Indexes: + "idxpart_col_keep_idx" btree (col_keep) +Number of partitions: 1 (Use \d+ to list them.) + +\d idxpart1 + Table "public.idxpart1" + Column | Type | Collation | Nullable | Default +----------+---------+-----------+----------+--------- + col_keep | integer | | | +Partition of: idxpart FOR VALUES FROM (0) TO (1000) +Indexes: + "idxpart1_col_keep_idx" btree (col_keep) + +select attrelid::regclass, attname, attnum from pg_attribute + where attrelid::regclass::text like 'idxpart%' and attnum > 0 + order by attrelid::regclass, attnum; + attrelid | attname | attnum +-----------------------+------------------------------+-------- + idxpart1 | ........pg.dropped.1........ | 1 + idxpart1 | ........pg.dropped.2........ | 2 + idxpart1 | col_keep | 3 + idxpart1 | ........pg.dropped.4........ | 4 + idxpart1_col_keep_idx | col_keep | 1 + idxpart | col_keep | 1 + idxpart_col_keep_idx | col_keep | 1 +(7 rows) + +drop table idxpart; +-- Column number mapping: dropped columns in the parent table +create table idxpart(drop_1 int, drop_2 int, col_keep int, drop_3 int) partition by range (col_keep); +alter table idxpart drop column drop_1; +alter table idxpart drop column drop_2; +alter table idxpart drop column drop_3; +create table idxpart1 (col_keep int); +create index on idxpart1 (col_keep); +create index on idxpart (col_keep); +alter table idxpart attach partition idxpart1 for values from (0) to (1000); +\d idxpart + Table "public.idxpart" + Column | Type | Collation | Nullable | Default +----------+---------+-----------+----------+--------- + col_keep | integer | | | +Partition key: RANGE (col_keep) +Indexes: + "idxpart_col_keep_idx" btree (col_keep) +Number of partitions: 1 (Use \d+ to list them.) + +\d idxpart1 + Table "public.idxpart1" + Column | Type | Collation | Nullable | Default +----------+---------+-----------+----------+--------- + col_keep | integer | | | +Partition of: idxpart FOR VALUES FROM (0) TO (1000) +Indexes: + "idxpart1_col_keep_idx" btree (col_keep) + +select attrelid::regclass, attname, attnum from pg_attribute + where attrelid::regclass::text like 'idxpart%' and attnum > 0 + order by attrelid::regclass, attnum; + attrelid | attname | attnum +-----------------------+------------------------------+-------- + idxpart | ........pg.dropped.1........ | 1 + idxpart | ........pg.dropped.2........ | 2 + idxpart | col_keep | 3 + idxpart | ........pg.dropped.4........ | 4 + idxpart1 | col_keep | 1 + idxpart1_col_keep_idx | col_keep | 1 + idxpart_col_keep_idx | col_keep | 1 +(7 rows) + +drop table idxpart; +-- intentionally leave some objects around +create table idxpart (a int) partition by range (a); +create table idxpart1 partition of idxpart for values from (0) to (100); +create table idxpart2 partition of idxpart for values from (100) to (1000) + partition by range (a); +create table idxpart21 partition of idxpart2 for values from (100) to (200); +create table idxpart22 partition of idxpart2 for values from (200) to (300); +create index on idxpart22 (a); +create index on only idxpart2 (a); +alter index idxpart2_a_idx attach partition idxpart22_a_idx; +create index on idxpart (a); diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index e224977791..ad9434fb87 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -116,7 +116,7 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare without_oid c # ---------- # Another group of parallel tests # ---------- -test: identity partition_join partition_prune reloptions hash_part +test: identity partition_join partition_prune reloptions hash_part indexing # event triggers cannot run concurrently with any test that runs DDL test: event_trigger diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index 9fc5f1a268..27cd49845e 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -184,5 +184,6 @@ test: partition_join test: partition_prune test: reloptions test: hash_part +test: indexing test: event_trigger test: stats diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql index 02a33ca7c4..af25ee9e77 100644 --- a/src/test/regress/sql/alter_table.sql +++ b/src/test/regress/sql/alter_table.sql @@ -1330,6 +1330,22 @@ create table tab1 (a int, b text); create table tab2 (x int, y tab1); alter table tab1 alter column b type varchar; -- fails +-- Alter column type that's part of a partitioned index +create table at_partitioned (a int, b text) partition by range (a); +create table at_part_1 partition of at_partitioned for values from (0) to (1000); +insert into at_partitioned values (512, '0.123'); +create table at_part_2 (b text, a int); +insert into at_part_2 values ('1.234', 1024); +create index on at_partitioned (b); +create index on at_partitioned (a); +\d at_part_1 +\d at_part_2 +alter table at_partitioned attach partition at_part_2 for values from (1000) to (2000); +\d at_part_2 +alter table at_partitioned alter column b type numeric using b::numeric; +\d at_part_1 +\d at_part_2 + -- disallow recursive containment of row types create temp table recur1 (f1 int); alter table recur1 add column f2 recur1; -- fails diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql new file mode 100644 index 0000000000..0be1bd7376 --- /dev/null +++ b/src/test/regress/sql/indexing.sql @@ -0,0 +1,244 @@ +-- Creating an index on a partitioned table makes the partitions +-- automatically get the index +create table idxpart (a int, b int, c text) partition by range (a); +create table idxpart1 partition of idxpart for values from (0) to (10); +create table idxpart2 partition of idxpart for values from (10) to (100) + partition by range (b); +create table idxpart21 partition of idxpart2 for values from (0) to (100); +create index on idxpart (a); +select relname, relkind, inhparent::regclass + from pg_class left join pg_index ix on (indexrelid = oid) + left join pg_inherits on (ix.indexrelid = inhrelid) + where relname like 'idxpart%' order by relname; +drop table idxpart; + +-- Some unsupported features +create table idxpart (a int, b int, c text) partition by range (a); +create table idxpart1 partition of idxpart for values from (0) to (10); +create unique index on idxpart (a); +create index concurrently on idxpart (a); +drop table idxpart; + +-- If a table without index is attached as partition to a table with +-- an index, the index is automatically created +create table idxpart (a int, b int, c text) partition by range (a); +create index idxparti on idxpart (a); +create index idxparti2 on idxpart (b, c); +create table idxpart1 (like idxpart); +\d idxpart1 +alter table idxpart attach partition idxpart1 for values from (0) to (10); +\d idxpart1 +drop table idxpart; + +-- If a partition already has an index, don't create a duplicative one +create table idxpart (a int, b int) partition by range (a, b); +create table idxpart1 partition of idxpart for values from (0, 0) to (10, 10); +create index on idxpart1 (a, b); +create index on idxpart (a, b); +\d idxpart1 +select relname, relkind, inhparent::regclass + from pg_class left join pg_index ix on (indexrelid = oid) + left join pg_inherits on (ix.indexrelid = inhrelid) + where relname like 'idxpart%' order by relname; +drop table idxpart; + +-- DROP behavior for partitioned indexes +create table idxpart (a int) partition by range (a); +create index on idxpart (a); +create table idxpart1 partition of idxpart for values from (0) to (10); +drop index idxpart1_a_idx; -- no way +drop index idxpart_a_idx; -- both indexes go away +select relname, relkind from pg_class + where relname like 'idxpart%' order by relname; +create index on idxpart (a); +drop table idxpart1; -- the index on partition goes away too +select relname, relkind from pg_class + where relname like 'idxpart%' order by relname; +drop table idxpart; + +-- ALTER INDEX .. ATTACH, error cases +create table idxpart (a int, b int) partition by range (a, b); +create table idxpart1 partition of idxpart for values from (0, 0) to (10, 10); +create index idxpart_a_b_idx on only idxpart (a, b); +create index idxpart1_a_b_idx on idxpart1 (a, b); +create index idxpart1_tst1 on idxpart1 (b, a); +create index idxpart1_tst2 on idxpart1 using hash (a); +create index idxpart1_tst3 on idxpart1 (a, b) where a > 10; + +alter index idxpart attach partition idxpart1; +alter index idxpart_a_b_idx attach partition idxpart1; +alter index idxpart_a_b_idx attach partition idxpart_a_b_idx; +alter index idxpart_a_b_idx attach partition idxpart1_b_idx; +alter index idxpart_a_b_idx attach partition idxpart1_tst1; +alter index idxpart_a_b_idx attach partition idxpart1_tst2; +alter index idxpart_a_b_idx attach partition idxpart1_tst3; +-- OK +alter index idxpart_a_b_idx attach partition idxpart1_a_b_idx; +alter index idxpart_a_b_idx attach partition idxpart1_a_b_idx; -- quiet + +-- reject dupe +create index idxpart1_2_a_b on idxpart1 (a, b); +alter index idxpart_a_b_idx attach partition idxpart1_2_a_b; +drop table idxpart; +-- make sure everything's gone +select indexrelid::regclass, indrelid::regclass + from pg_index where indexrelid::regclass::text like 'idxpart%'; + +-- Don't auto-attach incompatible indexes +create table idxpart (a int, b int) partition by range (a); +create table idxpart1 (a int, b int); +create index on idxpart1 using hash (a); +create index on idxpart1 (a) where b > 1; +create index on idxpart1 ((a + 0)); +create index on idxpart1 (a, a); +create index on idxpart (a); +alter table idxpart attach partition idxpart1 for values from (0) to (1000); +\d idxpart1 +drop table idxpart; + +-- If CREATE INDEX ONLY, don't create indexes on partitions; and existing +-- indexes on partitions don't change parent. ALTER INDEX ATTACH can change +-- the parent after the fact. +create table idxpart (a int) partition by range (a); +create table idxpart1 partition of idxpart for values from (0) to (100); +create table idxpart2 partition of idxpart for values from (100) to (1000) + partition by range (a); +create table idxpart21 partition of idxpart2 for values from (100) to (200); +create table idxpart22 partition of idxpart2 for values from (200) to (300); +create index on idxpart22 (a); +create index on only idxpart2 (a); +create index on idxpart (a); +-- Here we expect that idxpart1 and idxpart2 have a new index, but idxpart21 +-- does not; also, idxpart22 is not attached. +\d idxpart1 +\d idxpart2 +\d idxpart21 +select indexrelid::regclass, indrelid::regclass, inhparent::regclass + from pg_index idx left join pg_inherits inh on (idx.indexrelid = inh.inhrelid) +where indexrelid::regclass::text like 'idxpart%' + order by indrelid::regclass; +alter index idxpart2_a_idx attach partition idxpart22_a_idx; +select indexrelid::regclass, indrelid::regclass, inhparent::regclass + from pg_index idx left join pg_inherits inh on (idx.indexrelid = inh.inhrelid) +where indexrelid::regclass::text like 'idxpart%' + order by indrelid::regclass; +-- attaching idxpart22 is not enough to set idxpart22_a_idx valid ... +alter index idxpart2_a_idx attach partition idxpart22_a_idx; +\d idxpart2 +-- ... but this one is. +create index on idxpart21 (a); +alter index idxpart2_a_idx attach partition idxpart21_a_idx; +\d idxpart2 +drop table idxpart; + +-- When a table is attached a partition and it already has an index, a +-- duplicate index should not get created, but rather the index becomes +-- attached to the parent's index. +create table idxpart (a int, b int, c text) partition by range (a); +create index idxparti on idxpart (a); +create index idxparti2 on idxpart (b, c); +create table idxpart1 (like idxpart including indexes); +\d idxpart1 +select relname, relkind, inhparent::regclass + from pg_class left join pg_index ix on (indexrelid = oid) + left join pg_inherits on (ix.indexrelid = inhrelid) + where relname like 'idxpart%' order by relname; +alter table idxpart attach partition idxpart1 for values from (0) to (10); +\d idxpart1 +select relname, relkind, inhparent::regclass + from pg_class left join pg_index ix on (indexrelid = oid) + left join pg_inherits on (ix.indexrelid = inhrelid) + where relname like 'idxpart%' order by relname; +drop table idxpart; + +-- verify dependency handling during ALTER TABLE DETACH PARTITION +create table idxpart (a int) partition by range (a); +create table idxpart1 (like idxpart); +create index on idxpart1 (a); +create index on idxpart (a); +create table idxpart2 (like idxpart); +alter table idxpart attach partition idxpart1 for values from (0000) to (1000); +alter table idxpart attach partition idxpart2 for values from (1000) to (2000); +create table idxpart3 partition of idxpart for values from (2000) to (3000); +select relname, relkind from pg_class where relname like 'idxpart%'; +-- a) after detaching partitions, the indexes can be dropped independently +alter table idxpart detach partition idxpart1; +alter table idxpart detach partition idxpart2; +alter table idxpart detach partition idxpart3; +drop index idxpart1_a_idx; +drop index idxpart2_a_idx; +drop index idxpart3_a_idx; +select relname, relkind from pg_class where relname like 'idxpart%'; +drop table idxpart, idxpart1, idxpart2, idxpart3; +select relname, relkind from pg_class where relname like 'idxpart%'; + +create table idxpart (a int) partition by range (a); +create table idxpart1 (like idxpart); +create index on idxpart1 (a); +create index on idxpart (a); +create table idxpart2 (like idxpart); +alter table idxpart attach partition idxpart1 for values from (0000) to (1000); +alter table idxpart attach partition idxpart2 for values from (1000) to (2000); +create table idxpart3 partition of idxpart for values from (2000) to (3000); +-- b) after detaching, dropping the index on parent does not remove the others +select relname, relkind from pg_class where relname like 'idxpart%'; +alter table idxpart detach partition idxpart1; +alter table idxpart detach partition idxpart2; +alter table idxpart detach partition idxpart3; +drop index idxpart_a_idx; +select relname, relkind from pg_class where relname like 'idxpart%'; +drop table idxpart, idxpart1, idxpart2, idxpart3; +select relname, relkind from pg_class where relname like 'idxpart%'; + +-- Make sure the partition columns are mapped correctly +create table idxpart (a int, b int, c text) partition by range (a); +create index idxparti on idxpart (a); +create index idxparti2 on idxpart (c, b); +create table idxpart1 (c text, a int, b int); +alter table idxpart attach partition idxpart1 for values from (0) to (10); +\d idxpart1 +drop table idxpart; + +-- Column number mapping: dropped columns in the partition +create table idxpart1 (drop_1 int, drop_2 int, col_keep int, drop_3 int); +alter table idxpart1 drop column drop_1; +alter table idxpart1 drop column drop_2; +alter table idxpart1 drop column drop_3; +create index on idxpart1 (col_keep); +create table idxpart (col_keep int) partition by range (col_keep); +create index on idxpart (col_keep); +alter table idxpart attach partition idxpart1 for values from (0) to (1000); +\d idxpart +\d idxpart1 +select attrelid::regclass, attname, attnum from pg_attribute + where attrelid::regclass::text like 'idxpart%' and attnum > 0 + order by attrelid::regclass, attnum; +drop table idxpart; + +-- Column number mapping: dropped columns in the parent table +create table idxpart(drop_1 int, drop_2 int, col_keep int, drop_3 int) partition by range (col_keep); +alter table idxpart drop column drop_1; +alter table idxpart drop column drop_2; +alter table idxpart drop column drop_3; +create table idxpart1 (col_keep int); +create index on idxpart1 (col_keep); +create index on idxpart (col_keep); +alter table idxpart attach partition idxpart1 for values from (0) to (1000); +\d idxpart +\d idxpart1 +select attrelid::regclass, attname, attnum from pg_attribute + where attrelid::regclass::text like 'idxpart%' and attnum > 0 + order by attrelid::regclass, attnum; +drop table idxpart; + +-- intentionally leave some objects around +create table idxpart (a int) partition by range (a); +create table idxpart1 partition of idxpart for values from (0) to (100); +create table idxpart2 partition of idxpart for values from (100) to (1000) + partition by range (a); +create table idxpart21 partition of idxpart2 for values from (100) to (200); +create table idxpart22 partition of idxpart2 for values from (200) to (300); +create index on idxpart22 (a); +create index on only idxpart2 (a); +alter index idxpart2_a_idx attach partition idxpart22_a_idx; +create index on idxpart (a); -- 2.11.0