*** /tmp/DqhRGF_vacuum.c 2018-03-02 18:43:54.448046402 +0300
--- src/backend/commands/vacuum.c 2018-03-02 18:22:04.223070206 +0300
***************
*** 780,791 ****
BlockNumber scanned_pages,
double scanned_tuples)
{
- BlockNumber old_rel_pages = relation->rd_rel->relpages;
double old_rel_tuples = relation->rd_rel->reltuples;
! double old_density;
! double new_density;
double multiplier;
- double updated_density;
/* If we did scan the whole table, just use the count as-is */
if (scanned_pages >= total_pages)
--- 780,788 ----
BlockNumber scanned_pages,
double scanned_tuples)
{
double old_rel_tuples = relation->rd_rel->reltuples;
! double new_rel_tuples;
double multiplier;
/* If we did scan the whole table, just use the count as-is */
if (scanned_pages >= total_pages)
***************
*** 801,839 ****
return old_rel_tuples;
/*
! * If old value of relpages is zero, old density is indeterminate; we
! * can't do much except scale up scanned_tuples to match total_pages.
*/
! if (old_rel_pages == 0)
! return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
/*
! * Okay, we've covered the corner cases. The normal calculation is to
! * convert the old measurement to a density (tuples per page), then update
! * the density using an exponential-moving-average approach, and finally
! * compute reltuples as updated_density * total_pages.
! *
! * For ANALYZE, the moving average multiplier is just the fraction of the
! * table's pages we scanned. This is equivalent to assuming that the
! * tuple density in the unscanned pages didn't change. Of course, it
! * probably did, if the new density measurement is different. But over
! * repeated cycles, the value of reltuples will converge towards the
! * correct value, if repeated measurements show the same new density.
! *
! * For VACUUM, the situation is a bit different: we have looked at a
! * nonrandom sample of pages, but we know for certain that the pages we
! * didn't look at are precisely the ones that haven't changed lately.
! * Thus, there is a reasonable argument for doing exactly the same thing
! * as for the ANALYZE case, that is use the old density measurement as the
! * value for the unscanned pages.
! *
! * This logic could probably use further refinement.
*/
- old_density = old_rel_tuples / old_rel_pages;
- new_density = scanned_tuples / scanned_pages;
multiplier = (double) scanned_pages / (double) total_pages;
! updated_density = old_density + (new_density - old_density) * multiplier;
! return floor(updated_density * total_pages + 0.5);
}
--- 798,825 ----
return old_rel_tuples;
/*
! * Estimate the total number of tuples based on the density of scanned
! * tuples.
*/
! new_rel_tuples = floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
/*
! * ANALYZE scans a representative subset of pages, so we trust its density
! * estimate.
! */
! if (is_analyze)
! return new_rel_tuples;
!
! /*
! * VACUUM scanned a nonrandom sample of pages, so we can't just scale up its
! * result. For the portion of table it didn't scan, use the old number of tuples,
! * and for the portion it did scan, use the number it reported. This is
! * effectively an exponential moving average with adaptive factor.
*/
multiplier = (double) scanned_pages / (double) total_pages;
! return floor(old_rel_tuples * (1. - multiplier)
! + new_rel_tuples * multiplier
! + 0.5);
}