*** src/backend/catalog/heap.c.orig Tue Aug 31 13:10:36 2004 --- src/backend/catalog/heap.c Mon Nov 29 18:45:34 2004 *************** *** 607,643 **** */ new_rel_reltup = new_rel_desc->rd_rel; - /* - * Here we insert bogus estimates of the size of the new relation. In - * reality, of course, the new relation has 0 tuples and pages, and if - * we were tracking these statistics accurately then we'd set the - * fields that way. But at present the stats will be updated only by - * VACUUM or CREATE INDEX, and the user might insert a lot of tuples - * before he gets around to doing either of those. So, instead of - * saying the relation is empty, we insert guesstimates. The point is - * to keep the optimizer from making really stupid choices on - * never-yet-vacuumed tables; so the estimates need only be large - * enough to discourage the optimizer from using nested-loop plans. - * With this hack, nested-loop plans will be preferred only after the - * table has been proven to be small by VACUUM or CREATE INDEX. - * Maintaining the stats on-the-fly would solve the problem more - * cleanly, but the overhead of that would likely cost more than it'd - * save. (NOTE: CREATE INDEX inserts the same bogus estimates if it - * finds the relation has 0 rows and pages. See index.c.) - */ switch (relkind) { case RELKIND_RELATION: case RELKIND_INDEX: case RELKIND_TOASTVALUE: ! new_rel_reltup->relpages = 10; /* bogus estimates */ ! new_rel_reltup->reltuples = 1000; break; case RELKIND_SEQUENCE: new_rel_reltup->relpages = 1; new_rel_reltup->reltuples = 1; break; ! default: /* views, etc */ new_rel_reltup->relpages = 0; new_rel_reltup->reltuples = 0; break; --- 607,628 ---- */ new_rel_reltup = new_rel_desc->rd_rel; switch (relkind) { case RELKIND_RELATION: case RELKIND_INDEX: case RELKIND_TOASTVALUE: ! /* The relation is real, but as yet empty */ ! new_rel_reltup->relpages = 0; ! new_rel_reltup->reltuples = 0; break; case RELKIND_SEQUENCE: + /* Sequences always have a known size */ new_rel_reltup->relpages = 1; new_rel_reltup->reltuples = 1; break; ! default: ! /* Views, etc, have no disk storage */ new_rel_reltup->relpages = 0; new_rel_reltup->reltuples = 0; break; *** src/backend/catalog/index.c.orig Fri Oct 15 18:39:53 2004 --- src/backend/catalog/index.c Mon Nov 29 18:46:51 2004 *************** *** 1170,1176 **** * * Update pg_class' relpages and reltuples statistics for the given relation * (which can be either a table or an index). Note that this is not used ! * in the context of VACUUM. * ---------------- */ void --- 1170,1176 ---- * * Update pg_class' relpages and reltuples statistics for the given relation * (which can be either a table or an index). Note that this is not used ! * in the context of VACUUM, only CREATE INDEX. * ---------------- */ void *************** *** 1209,1215 **** * Find the tuple to update in pg_class. Normally we make a copy of * the tuple using the syscache, modify it, and apply heap_update. But * in bootstrap mode we can't use heap_update, so we cheat and ! * overwrite the tuple in-place. * * We also must cheat if reindexing pg_class itself, because the target * index may presently not be part of the set of indexes that --- 1209,1216 ---- * Find the tuple to update in pg_class. Normally we make a copy of * the tuple using the syscache, modify it, and apply heap_update. But * in bootstrap mode we can't use heap_update, so we cheat and ! * overwrite the tuple in-place. (Note: as of PG 8.0 this isn't called ! * during bootstrap, but leave the code here for possible future use.) * * We also must cheat if reindexing pg_class itself, because the target * index may presently not be part of the set of indexes that *************** *** 1248,1281 **** /* * Figure values to insert. * ! * If we found zero tuples in the scan, do NOT believe it; instead put a ! * bogus estimate into the statistics fields. Otherwise, the common ! * pattern "CREATE TABLE; CREATE INDEX; insert data" leaves the table ! * with zero size statistics until a VACUUM is done. The optimizer ! * will generate very bad plans if the stats claim the table is empty ! * when it is actually sizable. See also CREATE TABLE in heap.c. ! * ! * Note: this path is also taken during bootstrap, because bootstrap.c ! * passes reltuples = 0 after loading a table. We have to estimate ! * some number for reltuples based on the actual number of pages. */ relpages = RelationGetNumberOfBlocks(whichRel); ! if (reltuples == 0) { ! if (relpages == 0) ! { ! /* Bogus defaults for a virgin table, same as heap.c */ ! reltuples = 1000; ! relpages = 10; ! } ! else if (whichRel->rd_rel->relkind == RELKIND_INDEX && relpages <= 2) ! { ! /* Empty index, leave bogus defaults in place */ ! reltuples = 1000; ! } ! else ! reltuples = ((double) relpages) * NTUPLES_PER_PAGE(whichRel->rd_rel->relnatts); } /* --- 1249,1275 ---- /* * Figure values to insert. * ! * If the relation has been truncated to zero tuples/pages, do not ! * simply insert 0/0 into pg_class; instead attempt to preserve the ! * previous tuple density information. plancat.c will interpret ! * nonzero reltuples with zero relpages as a direct density value, ! * so we can truthfully zero out relpages while preserving the old ! * density value. This gives a better result when a table is truncated ! * and refilled than just falling back to a default density estimate. */ relpages = RelationGetNumberOfBlocks(whichRel); ! if (reltuples == 0 && ! (relpages == 0 || ! (whichRel->rd_rel->relkind == RELKIND_INDEX && relpages <= 2))) { ! /* extract old density estimate */ ! reltuples = rd_rel->reltuples; ! if (rd_rel->relpages != 0) ! reltuples /= ((BlockNumber) rd_rel->relpages); ! /* re-scale in case it's an index */ ! if (relpages > 1) ! reltuples *= relpages; } /* *** src/backend/commands/vacuum.c.orig Fri Oct 15 18:39:56 2004 --- src/backend/commands/vacuum.c Mon Nov 29 18:47:11 2004 *************** *** 646,652 **** /* overwrite the existing statistics in the tuple */ pgcform = (Form_pg_class) GETSTRUCT(&rtup); pgcform->relpages = (int32) num_pages; ! pgcform->reltuples = num_tuples; pgcform->relhasindex = hasindex; /* --- 646,652 ---- /* overwrite the existing statistics in the tuple */ pgcform = (Form_pg_class) GETSTRUCT(&rtup); pgcform->relpages = (int32) num_pages; ! pgcform->reltuples = (float4) num_tuples; pgcform->relhasindex = hasindex; /* *** src/backend/commands/vacuumlazy.c.orig Mon Oct 25 11:42:02 2004 --- src/backend/commands/vacuumlazy.c Mon Nov 29 18:47:12 2004 *************** *** 37,42 **** --- 37,44 ---- */ #include "postgres.h" + #include + #include "access/genam.h" #include "access/heapam.h" #include "access/xlog.h" *************** *** 67,72 **** --- 69,75 ---- /* Overall statistics about rel */ BlockNumber rel_pages; double rel_tuples; + double tuples_deleted; BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */ Size threshold; /* minimum interesting free space */ /* List of TIDs of tuples we intend to delete */ *************** *** 94,105 **** Relation *Irel, int nindexes); static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats); static void lazy_scan_index(Relation indrel, LVRelStats *vacrelstats); ! static void lazy_vacuum_index(Relation indrel, LVRelStats *vacrelstats); static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, int tupindex, LVRelStats *vacrelstats); static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats); static BlockNumber count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats); static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks); static void lazy_record_dead_tuple(LVRelStats *vacrelstats, ItemPointer itemptr); --- 97,112 ---- Relation *Irel, int nindexes); static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats); static void lazy_scan_index(Relation indrel, LVRelStats *vacrelstats); ! static void lazy_vacuum_index(Relation indrel, double *index_tups_vacuumed, ! LVRelStats *vacrelstats); static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, int tupindex, LVRelStats *vacrelstats); static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats); static BlockNumber count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats); + static void lazy_update_relstats(Relation rel, BlockNumber num_pages, + double num_tuples, double tuples_removed, + bool hasindex); static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks); static void lazy_record_dead_tuple(LVRelStats *vacrelstats, ItemPointer itemptr); *************** *** 169,176 **** lazy_update_fsm(onerel, vacrelstats); /* Update statistics in pg_class */ ! vac_update_relstats(RelationGetRelid(onerel), vacrelstats->rel_pages, ! vacrelstats->rel_tuples, hasindex); } --- 176,184 ---- lazy_update_fsm(onerel, vacrelstats); /* Update statistics in pg_class */ ! lazy_update_relstats(onerel, vacrelstats->rel_pages, ! vacrelstats->rel_tuples, vacrelstats->tuples_deleted, ! hasindex); } *************** *** 195,200 **** --- 203,210 ---- tups_vacuumed, nkeep, nunused; + double *index_tups_vacuumed; + bool did_vacuum_index = false; int i; VacRUsage ru0; *************** *** 209,214 **** --- 219,233 ---- empty_pages = 0; num_tuples = tups_vacuumed = nkeep = nunused = 0; + /* + * Because index vacuuming is done in multiple passes, we have to + * keep track of the total number of rows removed from each index. + * index_tups_vacuumed[i] is the number removed so far from the i'th + * index. (For partial indexes this could well be different from + * tups_vacuumed.) + */ + index_tups_vacuumed = (double *) palloc0(nindexes * sizeof(double)); + nblocks = RelationGetNumberOfBlocks(onerel); vacrelstats->rel_pages = nblocks; vacrelstats->nonempty_pages = 0; *************** *** 238,244 **** { /* Remove index entries */ for (i = 0; i < nindexes; i++) ! lazy_vacuum_index(Irel[i], vacrelstats); /* Remove tuples from heap */ lazy_vacuum_heap(onerel, vacrelstats); /* Forget the now-vacuumed tuples, and press on */ --- 257,265 ---- { /* Remove index entries */ for (i = 0; i < nindexes; i++) ! lazy_vacuum_index(Irel[i], &index_tups_vacuumed[i], ! vacrelstats); ! did_vacuum_index = true; /* Remove tuples from heap */ lazy_vacuum_heap(onerel, vacrelstats); /* Forget the now-vacuumed tuples, and press on */ *************** *** 400,405 **** --- 421,427 ---- /* save stats for use later */ vacrelstats->rel_tuples = num_tuples; + vacrelstats->tuples_deleted = tups_vacuumed; /* If any tuples need to be deleted, perform final vacuum cycle */ /* XXX put a threshold on min number of tuples here? */ *************** *** 407,417 **** { /* Remove index entries */ for (i = 0; i < nindexes; i++) ! lazy_vacuum_index(Irel[i], vacrelstats); /* Remove tuples from heap */ lazy_vacuum_heap(onerel, vacrelstats); } ! else { /* Must do post-vacuum cleanup and statistics update anyway */ for (i = 0; i < nindexes; i++) --- 429,440 ---- { /* Remove index entries */ for (i = 0; i < nindexes; i++) ! lazy_vacuum_index(Irel[i], &index_tups_vacuumed[i], ! vacrelstats); /* Remove tuples from heap */ lazy_vacuum_heap(onerel, vacrelstats); } ! else if (!did_vacuum_index) { /* Must do post-vacuum cleanup and statistics update anyway */ for (i = 0; i < nindexes; i++) *************** *** 588,596 **** return; /* now update statistics in pg_class */ ! vac_update_relstats(RelationGetRelid(indrel), ! stats->num_pages, stats->num_index_tuples, ! false); ereport(elevel, (errmsg("index \"%s\" now contains %.0f row versions in %u pages", --- 611,619 ---- return; /* now update statistics in pg_class */ ! lazy_update_relstats(indrel, stats->num_pages, ! stats->num_index_tuples, stats->tuples_removed, ! false); ereport(elevel, (errmsg("index \"%s\" now contains %.0f row versions in %u pages", *************** *** 611,621 **** * Delete all the index entries pointing to tuples listed in * vacrelstats->dead_tuples. * * Finally, we arrange to update the index relation's statistics in * pg_class. */ static void ! lazy_vacuum_index(Relation indrel, LVRelStats *vacrelstats) { IndexBulkDeleteResult *stats; IndexVacuumCleanupInfo vcinfo; --- 634,648 ---- * Delete all the index entries pointing to tuples listed in * vacrelstats->dead_tuples. * + * Increment *index_tups_vacuumed by the number of index entries + * removed. + * * Finally, we arrange to update the index relation's statistics in * pg_class. */ static void ! lazy_vacuum_index(Relation indrel, double *index_tups_vacuumed, ! LVRelStats *vacrelstats) { IndexBulkDeleteResult *stats; IndexVacuumCleanupInfo vcinfo; *************** *** 652,661 **** if (!stats) return; /* now update statistics in pg_class */ ! vac_update_relstats(RelationGetRelid(indrel), ! stats->num_pages, stats->num_index_tuples, ! false); ereport(elevel, (errmsg("index \"%s\" now contains %.0f row versions in %u pages", --- 679,691 ---- if (!stats) return; + /* accumulate total removed over multiple index-cleaning cycles */ + *index_tups_vacuumed += stats->tuples_removed; + /* now update statistics in pg_class */ ! lazy_update_relstats(indrel, stats->num_pages, ! stats->num_index_tuples, *index_tups_vacuumed, ! false); ereport(elevel, (errmsg("index \"%s\" now contains %.0f row versions in %u pages", *************** *** 884,889 **** --- 914,942 ---- * known-nonempty page. */ return vacrelstats->nonempty_pages; + } + + /* + * lazy_update_relstats - update pg_class statistics for a table or index + * + * We always want to set relpages to an accurate value. However, for lazy + * VACUUM it seems best to set reltuples to the average of the number of + * rows before vacuuming and the number after vacuuming, rather than just + * using the number after vacuuming. This will result in the best average + * performance in a steady-state situation where VACUUMs are performed + * regularly on a table of roughly constant size, assuming that the physical + * number of pages in the table stays about the same throughout. (Note that + * we do not apply the same logic to VACUUM FULL, because it repacks the table + * and thereby boosts the tuple density.) + */ + static void + lazy_update_relstats(Relation rel, BlockNumber num_pages, + double num_tuples, double tuples_removed, + bool hasindex) + { + num_tuples = ceil(num_tuples + tuples_removed * 0.5); + vac_update_relstats(RelationGetRelid(rel), num_pages, num_tuples, + hasindex); } /* *** src/backend/optimizer/util/plancat.c.orig Fri Oct 1 13:11:50 2004 --- src/backend/optimizer/util/plancat.c Mon Nov 29 18:47:54 2004 *************** *** 38,43 **** --- 38,47 ---- #include "miscadmin.h" + static void estimate_rel_size(Relation rel, + BlockNumber *pages, double *tuples); + + /* * get_relation_info - * Retrieves catalog information for a given relation. *************** *** 121,128 **** } info->relam = indexRelation->rd_rel->relam; ! info->pages = indexRelation->rd_rel->relpages; ! info->tuples = indexRelation->rd_rel->reltuples; info->amcostestimate = index_cost_estimator(indexRelation); /* --- 125,131 ---- } info->relam = indexRelation->rd_rel->relam; ! estimate_rel_size(indexRelation, &info->pages, &info->tuples); info->amcostestimate = index_cost_estimator(indexRelation); /* *************** *** 170,180 **** rel->indexlist = indexinfos; ! rel->pages = relation->rd_rel->relpages; ! rel->tuples = relation->rd_rel->reltuples; /* XXX keep the lock here? */ heap_close(relation, AccessShareLock); } /* --- 173,245 ---- rel->indexlist = indexinfos; ! estimate_rel_size(relation, &rel->pages, &rel->tuples); /* XXX keep the lock here? */ heap_close(relation, AccessShareLock); + } + + /* + * estimate_rel_size - estimate # pages and # tuples in a table or index + */ + static void + estimate_rel_size(Relation rel, + BlockNumber *pages, double *tuples) + { + #if 1 + BlockNumber relpages; + double reltuples; + double density; + + switch (rel->rd_rel->relkind) + { + case RELKIND_RELATION: + case RELKIND_INDEX: + case RELKIND_TOASTVALUE: + /* it has storage, ok to call the smgr */ + *pages = RelationGetNumberOfBlocks(rel); + /* coerce values in pg_class to more desirable types */ + relpages = (BlockNumber) rel->rd_rel->relpages; + reltuples = (double) rel->rd_rel->reltuples; + /* estimate number of tuples from previous tuple density */ + if (relpages > 0) + density = reltuples / (double) relpages; + else if (reltuples > 0) + { + /* + * Assume that the value in reltuples is meant as a direct + * density estimate. + */ + density = reltuples; + } + else + { + /* + * When we have no data because the relation was truncated, + * use a default that matches the pre-8.0 default settings + * (reltuples = 1000, relpages = 10). An alternative is + * to estimate an average tuple width on the basis of column + * datatypes, but it's unclear that that's worth the trouble. + */ + density = 100; + } + *tuples = rint(density * (double) *pages); + break; + case RELKIND_SEQUENCE: + /* Sequences always have a known size */ + *pages = 1; + *tuples = 1; + break; + default: + /* else it has no disk storage; probably shouldn't get here? */ + *pages = 0; + *tuples = 0; + break; + } + #else /* pre-8.0 behavior */ + *pages = (BlockNumber) rel->rd_rel->relpages; + *tuples = (double) rel->rd_rel->reltuples; + #endif } /* *** src/include/nodes/relation.h.orig Fri Nov 26 16:08:35 2004 --- src/include/nodes/relation.h Mon Nov 29 16:41:52 2004 *************** *** 255,261 **** Oid indexoid; /* OID of the index relation */ /* statistics from pg_class */ ! long pages; /* number of disk pages in index */ double tuples; /* number of index tuples in index */ /* index descriptor information */ --- 255,261 ---- Oid indexoid; /* OID of the index relation */ /* statistics from pg_class */ ! BlockNumber pages; /* number of disk pages in index */ double tuples; /* number of index tuples in index */ /* index descriptor information */