Index: src/backend/commands/cluster.c =================================================================== RCS file: /projects/cvsroot/pgsql-server/src/backend/commands/cluster.c,v retrieving revision 1.83 diff -c -r1.83 cluster.c *** src/backend/commands/cluster.c 2002/07/12 18:43:15 1.83 --- src/backend/commands/cluster.c 2002/08/03 20:44:43 *************** *** 27,71 **** #include "catalog/dependency.h" #include "catalog/heap.h" #include "catalog/index.h" #include "catalog/pg_index.h" #include "catalog/pg_proc.h" #include "commands/cluster.h" #include "commands/tablecmds.h" #include "miscadmin.h" #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/syscache.h" static Oid copy_heap(Oid OIDOldHeap, const char *NewName); - static Oid copy_index(Oid OIDOldIndex, Oid OIDNewHeap, - const char *NewIndexName); static void rebuildheap(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex); /* * cluster * ! * STILL TO DO: ! * Create a list of all the other indexes on this relation. Because ! * the cluster will wreck all the tids, I'll need to destroy bogus ! * indexes. The user will have to re-create them. Not nice, but ! * I'm not a nice guy. The alternative is to try some kind of post ! * destroy re-build. This may be possible. I'll check out what the ! * index create functiond want in the way of paramaters. On the other ! * hand, re-creating n indexes may blow out the space. */ void cluster(RangeVar *oldrelation, char *oldindexname) { Oid OIDOldHeap, OIDOldIndex, ! OIDNewHeap, ! OIDNewIndex; Relation OldHeap, OldIndex; char NewHeapName[NAMEDATALEN]; - char NewIndexName[NAMEDATALEN]; ObjectAddress object; /* * We grab exclusive access to the target rel and index for the --- 27,100 ---- #include "catalog/dependency.h" #include "catalog/heap.h" #include "catalog/index.h" + #include "catalog/indexing.h" + #include "catalog/catname.h" #include "catalog/pg_index.h" #include "catalog/pg_proc.h" #include "commands/cluster.h" #include "commands/tablecmds.h" #include "miscadmin.h" #include "utils/builtins.h" + #include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/syscache.h" + #include "utils/relcache.h" + /* + * We need one of these structs for each index in the relation to be + * clustered. It's basically the data needed by index_create() so + * we can recreate the indexes after destroying the old heap. + */ + typedef struct + { + char *indexName; + IndexInfo *indexInfo; + Oid accessMethodOID; + Oid *classOID; + Oid indexOID; + bool isPrimary; + } IndexAttrs; static Oid copy_heap(Oid OIDOldHeap, const char *NewName); static void rebuildheap(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex); + static List *get_indexattr_list (Oid OIDOldHeap); + static void recreate_indexattr(Oid OIDOldHeap, List *indexes); + static void swap_relfilenodes(Oid r1, Oid r2); + Relation RelationIdGetRelation(Oid relationId); + /* * cluster + * + * This clusters the table by creating a new, clustered table and + * swapping the relfilenodes of the new table and the old table, so + * the OID of the original table is preserved. Thus we do not lose + * GRANT, inheritance nor references to this table (this was a bug + * in releases thru 7.3) + * + * Also create new indexes and swap the filenodes with the old indexes + * the same way we do for the relation. + * + * TODO: + * maybe we can get away with AccessShareLock for the table. + * Concurrency would be much improved. Only acquire + * AccessExclusiveLock right before swapping the filenodes. + * This would allow users to CLUSTER on a regular basis, + * practically eliminating the need for auto-clustered indexes. * ! * Preserve constraint bit for the indexes. */ void cluster(RangeVar *oldrelation, char *oldindexname) { Oid OIDOldHeap, OIDOldIndex, ! OIDNewHeap; Relation OldHeap, OldIndex; char NewHeapName[NAMEDATALEN]; ObjectAddress object; + List *indexes; /* * We grab exclusive access to the target rel and index for the *************** *** 96,101 **** --- 125,133 ---- heap_close(OldHeap, NoLock); index_close(OldIndex); + /* Save the information of all indexes on the relation. */ + indexes = get_indexattr_list(OIDOldHeap); + /* * Create the new heap with a temporary name. */ *************** *** 112,141 **** /* To make the new heap's data visible. */ CommandCounterIncrement(); - - /* Create new index over the tuples of the new heap. */ - snprintf(NewIndexName, NAMEDATALEN, "temp_%u", OIDOldIndex); ! OIDNewIndex = copy_index(OIDOldIndex, OIDNewHeap, NewIndexName); CommandCounterIncrement(); ! /* Destroy old heap (along with its index) and rename new. */ object.classId = RelOid_pg_class; ! object.objectId = OIDOldHeap; object.objectSubId = 0; ! /* XXX better to use DROP_CASCADE here? */ performDeletion(&object, DROP_RESTRICT); /* performDeletion does CommandCounterIncrement at end */ - - renamerel(OIDNewHeap, oldrelation->relname); ! /* This one might be unnecessary, but let's be safe. */ ! CommandCounterIncrement(); ! ! renamerel(OIDNewIndex, oldindexname); } static Oid --- 144,171 ---- /* To make the new heap's data visible. */ CommandCounterIncrement(); ! /* Swap the relfilenodes of the old and new heaps. */ ! swap_relfilenodes(OIDNewHeap, OIDOldHeap); CommandCounterIncrement(); ! /* Destroy new heap with old filenode */ object.classId = RelOid_pg_class; ! object.objectId = OIDNewHeap; object.objectSubId = 0; ! /* The relation is local to our transaction and we know nothin ! * depends on it, so DROP_RESTRICT should be OK. ! */ performDeletion(&object, DROP_RESTRICT); /* performDeletion does CommandCounterIncrement at end */ ! /* Recreate the indexes on the relation. We do not need ! * CommandCounterIncrement() because recreate_indexattr does it. ! */ ! recreate_indexattr(OIDOldHeap, indexes); } static Oid *************** *** 181,223 **** return OIDNewHeap; } - static Oid - copy_index(Oid OIDOldIndex, Oid OIDNewHeap, const char *NewIndexName) - { - Oid OIDNewIndex; - Relation OldIndex, - NewHeap; - IndexInfo *indexInfo; - - NewHeap = heap_open(OIDNewHeap, AccessExclusiveLock); - OldIndex = index_open(OIDOldIndex); - - /* - * Create a new index like the old one. To do this I get the info - * from pg_index, and add a new index with a temporary name (that will - * be changed later). - */ - indexInfo = BuildIndexInfo(OldIndex->rd_index); - - OIDNewIndex = index_create(OIDNewHeap, - NewIndexName, - indexInfo, - OldIndex->rd_rel->relam, - OldIndex->rd_index->indclass, - OldIndex->rd_index->indisprimary, - false, /* XXX losing constraint status */ - allowSystemTableMods); - - setRelhasindex(OIDNewHeap, true, - OldIndex->rd_index->indisprimary, InvalidOid); - - index_close(OldIndex); - heap_close(NewHeap, NoLock); - - return OIDNewIndex; - } - - static void rebuildheap(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex) { --- 211,216 ---- *************** *** 260,263 **** --- 253,441 ---- index_close(LocalOldIndex); heap_close(LocalOldHeap, NoLock); heap_close(LocalNewHeap, NoLock); + } + + /* Get the necessary info about the indexes in the relation and + * return a List of IndexAttrs. + */ + List * + get_indexattr_list (Oid OIDOldHeap) + { + ScanKeyData entry; + HeapScanDesc scan; + Relation indexRelation; + HeapTuple indexTuple; + List *indexes = NIL; + IndexAttrs *attrs; + HeapTuple tuple; + Form_pg_index index; + + /* Grab the index tuples by looking into RelationRelationName + * by the OID of the old heap. + */ + indexRelation = heap_openr(IndexRelationName, AccessShareLock); + ScanKeyEntryInitialize(&entry, 0, Anum_pg_index_indrelid, + F_OIDEQ, ObjectIdGetDatum(OIDOldHeap)); + scan = heap_beginscan(indexRelation, SnapshotNow, 1, &entry); + while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + { + index = (Form_pg_index) GETSTRUCT(indexTuple); + + attrs = (IndexAttrs *) palloc(sizeof(IndexAttrs)); + attrs->indexInfo = BuildIndexInfo(index); + attrs->isPrimary = index->indisprimary; + attrs->indexOID = index->indexrelid; + + /* The opclasses are copied verbatim from the original indexes. + */ + attrs->classOID = (Oid *)palloc(sizeof(Oid) * + attrs->indexInfo->ii_NumIndexAttrs); + memcpy(attrs->classOID, index->indclass, + sizeof(Oid) * attrs->indexInfo->ii_NumIndexAttrs); + + /* Name and access method of each index come from + * RelationRelationName. + */ + tuple = SearchSysCache(RELOID, + ObjectIdGetDatum(attrs->indexOID), + 0, 0, 0); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "CLUSTER: cannot find index %u", attrs->indexOID); + attrs->indexName = pstrdup(NameStr(((Form_pg_class) GETSTRUCT(tuple))->relname)); + attrs->accessMethodOID = ((Form_pg_class) GETSTRUCT(tuple))->relam; + ReleaseSysCache(tuple); + + /* Cons the gathered data into the list. We do not care about + * ordering, and this is more efficient than append. + */ + indexes=lcons((void *)attrs, indexes); + } + heap_endscan(scan); + heap_close(indexRelation, AccessShareLock); + return indexes; + } + + /* Create new indexes and swap the filenodes with old indexes. Then drop + * the new index (carrying the old heap along). + */ + void + recreate_indexattr(Oid OIDOldHeap, List *indexes) + { + IndexAttrs *attrs; + List *elem; + Oid newIndexOID; + char newIndexName[NAMEDATALEN]; + ObjectAddress object; + + foreach (elem, indexes) + { + attrs=(IndexAttrs *) lfirst(elem); + + /* Create the new index under a temporary name */ + snprintf(newIndexName, NAMEDATALEN, "temp_%u", attrs->indexOID); + + /* The new index will have constraint status set to false, + * but since we will only use its filenode it doesn't matter: + * after the filenode swap the index will keep the constraint + * status of the old index. + */ + newIndexOID = index_create(OIDOldHeap, newIndexName, + attrs->indexInfo, attrs->accessMethodOID, + attrs->classOID, attrs->isPrimary, + false, allowSystemTableMods); + CommandCounterIncrement(); + + /* Swap the filenodes. */ + swap_relfilenodes(newIndexOID, attrs->indexOID); + setRelhasindex(OIDOldHeap, true, attrs->isPrimary, InvalidOid); + + /* I'm not sure this one is needed, but let's be safe. */ + CommandCounterIncrement(); + + /* Destroy new index with old filenode */ + object.classId = RelOid_pg_class; + object.objectId = newIndexOID; + object.objectSubId = 0; + + /* The relation is local to our transaction and we know + * nothing depends on it, so DROP_RESTRICT should be OK. + */ + performDeletion(&object, DROP_RESTRICT); + + /* performDeletion does CommandCounterIncrement() at its end */ + + pfree(attrs->classOID); + pfree(attrs); + } + freeList(indexes); + } + + /* Swap the relfilenodes for two given relations. + */ + void + swap_relfilenodes(Oid r1, Oid r2) + { + /* I can probably keep RelationRelationName open in the main + * function and pass the Relation around so I don't have to open + * it every time. + */ + Relation relRelation, + irels[Num_pg_class_indices], + rel; + HeapTuple reltup[2]; + Oid tempRFNode; + int i; + + /* We need both RelationRelationName tuples. */ + relRelation = heap_openr(RelationRelationName, RowExclusiveLock); + + reltup[0] = SearchSysCacheCopy(RELOID, + ObjectIdGetDatum(r1), + 0, 0, 0); + if (!HeapTupleIsValid(reltup[0])) + elog(ERROR, "CLUSTER: Cannot find tuple for relation %u", r1); + reltup[1] = SearchSysCacheCopy(RELOID, + ObjectIdGetDatum(r2), + 0, 0, 0); + if (!HeapTupleIsValid(reltup[1])) + elog(ERROR, "CLUSTER: Cannot find tuple for relation %u", r2); + + /* The buffer manager gets confused if we swap relfilenodes for + * relations that are not both local or non-local to this transaction. + * Flush the buffers on both relations so the buffer manager can + * forget about'em. + */ + + rel = RelationIdGetRelation(r1); + i = FlushRelationBuffers(rel, 0); + if (i < 0) + elog(ERROR, "CLUSTER: FlushRelationBuffers returned %d", i); + RelationClose(rel); + rel = RelationIdGetRelation(r1); + i = FlushRelationBuffers(rel, 0); + if (i < 0) + elog(ERROR, "CLUSTER: FlushRelationBuffers returned %d", i); + RelationClose(rel); + + /* Actually swap the filenodes */ + + tempRFNode = ((Form_pg_class) GETSTRUCT(reltup[0]))->relfilenode; + ((Form_pg_class) GETSTRUCT(reltup[0]))->relfilenode = + ((Form_pg_class) GETSTRUCT(reltup[1]))->relfilenode; + ((Form_pg_class) GETSTRUCT(reltup[1]))->relfilenode = tempRFNode; + + /* Update the RelationRelationName tuples */ + simple_heap_update(relRelation, &reltup[1]->t_self, reltup[1]); + simple_heap_update(relRelation, &reltup[0]->t_self, reltup[0]); + + /* To keep system catalogs current. */ + CatalogOpenIndices(Num_pg_class_indices, Name_pg_class_indices, irels); + CatalogIndexInsert(irels, Num_pg_class_indices, relRelation, reltup[1]); + CatalogIndexInsert(irels, Num_pg_class_indices, relRelation, reltup[0]); + CatalogCloseIndices(Num_pg_class_indices, irels); + CommandCounterIncrement(); + + heap_close(relRelation, NoLock); + heap_freetuple(reltup[0]); + heap_freetuple(reltup[1]); } Index: doc/src/sgml/ref/cluster.sgml =================================================================== RCS file: /projects/cvsroot/pgsql-server/doc/src/sgml/ref/cluster.sgml,v retrieving revision 1.16 diff -c -r1.16 cluster.sgml *** doc/src/sgml/ref/cluster.sgml 2002/04/23 02:07:15 1.16 --- doc/src/sgml/ref/cluster.sgml 2002/08/03 20:44:44 *************** *** 75,93 **** - ERROR: relation <tablerelation_number> inherits "table" - - - - - This is not documented anywhere. It seems not to be possible to - cluster a table that is inherited. - - - - - - ERROR: Relation table does not exist! --- 75,80 ---- *************** *** 139,151 **** - The table is actually copied to a temporary table in index - order, then renamed back to the original name. For this - reason, all grant permissions and other indexes are lost - when clustering is performed. - - - In cases where you are accessing single rows randomly within a table, the actual order of the data in the heap table is unimportant. However, if you tend to access some --- 126,131 ---- *************** *** 194,199 **** --- 174,193 ---- fast because most of the heap data has already been ordered, and the existing index is used. + + + During the cluster operation, a temporal table is created that contains + the table in the index order. Due to this, you need to have free space + on disk at least the size of the table itself, or the biggest index if + you have one that is larger than the table. + + + + As opposed to previous releases, CLUSTER does not lose GRANT, + inheritance or foreign key information, and preserves indexes + other than the one being used for the CLUSTER. + +