diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 44a4b3f..a2c606b 100644
*** a/src/backend/commands/analyze.c
--- b/src/backend/commands/analyze.c
*************** compute_distinct_stats(VacAttrStatsP sta
*** 2120,2128 ****
  		 * we are able to generate a complete MCV list (all the values in the
  		 * sample will fit, and we think these are all the ones in the table),
  		 * then do so.  Otherwise, store only those values that are
! 		 * significantly more common than the (estimated) average. We set the
! 		 * threshold rather arbitrarily at 25% more than average, with at
! 		 * least 2 instances in the sample.
  		 */
  		if (track_cnt < track_max && toowide_cnt == 0 &&
  			stats->stadistinct > 0 &&
--- 2120,2138 ----
  		 * we are able to generate a complete MCV list (all the values in the
  		 * sample will fit, and we think these are all the ones in the table),
  		 * then do so.  Otherwise, store only those values that are
! 		 * significantly more common than the ones we omit.  We determine that
! 		 * by considering the values in frequency order, and accepting each
! 		 * one if it is at least 50% more common than the average among the
! 		 * values after it.  The 50% threshold is somewhat arbitrary.
! 		 *
! 		 * Note that the 50% rule will never accept a value with count 1,
! 		 * since all the values have count at least 1; this is a property we
! 		 * desire, since there's no very good reason to assume that a
! 		 * single-occurrence value is an MCV and not just a random non-MCV.
! 		 *
! 		 * We need a special rule for the very last value.  If we get to it,
! 		 * we'll accept it if it's at least 1% of the non-null samples and has
! 		 * count more than 1.
  		 */
  		if (track_cnt < track_max && toowide_cnt == 0 &&
  			stats->stadistinct > 0 &&
*************** compute_distinct_stats(VacAttrStatsP sta
*** 2133,2153 ****
  		}
  		else
  		{
! 			/* d here is the same as d in the Haas-Stokes formula */
  			int			d = nonnull_cnt - summultiple + nmultiple;
! 			double		avgcount,
! 						mincount;
  
! 			/* estimate # occurrences in sample of a typical nonnull value */
! 			avgcount = (double) nonnull_cnt / (double) d;
! 			/* set minimum threshold count to store a value */
! 			mincount = avgcount * 1.25;
! 			if (mincount < 2)
! 				mincount = 2;
  			if (num_mcv > track_cnt)
  				num_mcv = track_cnt;
  			for (i = 0; i < num_mcv; i++)
  			{
  				if (track[i].count < mincount)
  				{
  					num_mcv = i;
--- 2143,2181 ----
  		}
  		else
  		{
! 			/* d here is initially the same as d in the Haas-Stokes formula */
  			int			d = nonnull_cnt - summultiple + nmultiple;
! 			int			remaining_samples = nonnull_cnt;
  
! 			/* can't store more MCVs than we tracked ... */
  			if (num_mcv > track_cnt)
  				num_mcv = track_cnt;
+ 			/* locate first value we're not going to store as an MCV */
  			for (i = 0; i < num_mcv; i++)
  			{
+ 				double		avgcount,
+ 							mincount;
+ 
+ 				/* remove current value from remaining_samples and d */
+ 				remaining_samples -= track[i].count;
+ 				d--;
+ 				if (d > 0)
+ 				{
+ 					/* get avg # occurrences of distinct values after this */
+ 					avgcount = (double) remaining_samples / (double) d;
+ 					/* set minimum count to accept a value (is surely > 1) */
+ 					mincount = avgcount * 1.50;
+ 				}
+ 				else
+ 				{
+ 					/* last value, use 1% rule */
+ 					mincount = nonnull_cnt * 0.01;
+ 
+ 					/* here, we need a clamp to avoid accepting count 1 */
+ 					if (mincount < 2)
+ 						mincount = 2;
+ 				}
+ 				/* if this value falls below threshold, we're done */
  				if (track[i].count < mincount)
  				{
  					num_mcv = i;
*************** compute_scalar_stats(VacAttrStatsP stats
*** 2375,2381 ****
  						/*
  						 * Found a new item for the mcv list; find its
  						 * position, bubbling down old items if needed. Loop
! 						 * invariant is that j points at an empty/ replaceable
  						 * slot.
  						 */
  						int			j;
--- 2403,2409 ----
  						/*
  						 * Found a new item for the mcv list; find its
  						 * position, bubbling down old items if needed. Loop
! 						 * invariant is that j points at an empty/replaceable
  						 * slot.
  						 */
  						int			j;
*************** compute_scalar_stats(VacAttrStatsP stats
*** 2475,2488 ****
  		 * we are able to generate a complete MCV list (all the values in the
  		 * sample will fit, and we think these are all the ones in the table),
  		 * then do so.  Otherwise, store only those values that are
! 		 * significantly more common than the (estimated) average. We set the
! 		 * threshold rather arbitrarily at 25% more than average, with at
! 		 * least 2 instances in the sample.  Also, we won't suppress values
! 		 * that have a frequency of at least 1/K where K is the intended
! 		 * number of histogram bins; such values might otherwise cause us to
! 		 * emit duplicate histogram bin boundaries.  (We might end up with
! 		 * duplicate histogram entries anyway, if the distribution is skewed;
! 		 * but we prefer to treat such values as MCVs if at all possible.)
  		 */
  		if (track_cnt == ndistinct && toowide_cnt == 0 &&
  			stats->stadistinct > 0 &&
--- 2503,2532 ----
  		 * we are able to generate a complete MCV list (all the values in the
  		 * sample will fit, and we think these are all the ones in the table),
  		 * then do so.  Otherwise, store only those values that are
! 		 * significantly more common than the ones we omit.  We determine that
! 		 * by considering the values in frequency order, and accepting each
! 		 * one if it is at least 50% more common than the average among the
! 		 * values after it.  The 50% threshold is somewhat arbitrary.
! 		 *
! 		 * We need a special rule for the very last value.  If we get to it,
! 		 * we'll accept it if it's at least 1% of the non-null samples and has
! 		 * count more than 1.
! 		 *
! 		 * Also, we will treat values as MCVs if they have a frequency of at
! 		 * least 1/K where K is the intended number of histogram entries.
! 		 * Relegating such values to the histogram might cause us to emit
! 		 * duplicate histogram entries.  (We might get duplicate histogram
! 		 * entries anyway, if the distribution is skewed; but we prefer to
! 		 * treat such values as MCVs if at all possible.)  For this purpose,
! 		 * measure the frequency with respect to the population represented by
! 		 * the histogram; so in the loop below, cur_remaining_samples/num_hist
! 		 * is the correct calculation.
! 		 *
! 		 * In any case, unless we believe we have a complete MCV list, we will
! 		 * not accept an MCV value with count 1, since there's no very good
! 		 * reason to assume that a single-occurrence value is an MCV and not
! 		 * just a random non-MCV.  This is automatic with the 50% rule but
! 		 * needs enforcement with the other ones.
  		 */
  		if (track_cnt == ndistinct && toowide_cnt == 0 &&
  			stats->stadistinct > 0 &&
*************** compute_scalar_stats(VacAttrStatsP stats
*** 2490,2523 ****
  		{
  			/* Track list includes all values seen, and all will fit */
  			num_mcv = track_cnt;
  		}
  		else
  		{
! 			/* d here is the same as d in the Haas-Stokes formula */
  			int			d = ndistinct + toowide_cnt;
! 			double		avgcount,
! 						mincount,
! 						maxmincount;
  
! 			/* estimate # occurrences in sample of a typical nonnull value */
! 			avgcount = (double) values_cnt / (double) d;
! 			/* set minimum threshold count to store a value */
! 			mincount = avgcount * 1.25;
! 			if (mincount < 2)
! 				mincount = 2;
! 			/* don't let threshold exceed 1/K, however */
! 			maxmincount = (double) values_cnt / (double) num_bins;
! 			if (mincount > maxmincount)
! 				mincount = maxmincount;
  			if (num_mcv > track_cnt)
  				num_mcv = track_cnt;
  			for (i = 0; i < num_mcv; i++)
  			{
  				if (track[i].count < mincount)
  				{
  					num_mcv = i;
  					break;
  				}
  			}
  		}
  
--- 2534,2605 ----
  		{
  			/* Track list includes all values seen, and all will fit */
  			num_mcv = track_cnt;
+ 			/* Nothing left for the histogram */
+ 			num_hist = 0;
  		}
  		else
  		{
! 			/* d here is initially the same as d in the Haas-Stokes formula */
  			int			d = ndistinct + toowide_cnt;
! 			int			remaining_samples = values_cnt;
  
! 			/*
! 			 * num_hist is the planned histogram size; it's limited by the
! 			 * number of distinct sample vals not absorbed into the MCV list.
! 			 * Start with assumption that nothing is absorbed into MCV list.
! 			 */
! 			num_hist = Min(ndistinct, num_bins + 1);
! 
! 			/* can't store more MCVs than we tracked ... */
  			if (num_mcv > track_cnt)
  				num_mcv = track_cnt;
+ 			/* locate first value we're not going to store as an MCV */
  			for (i = 0; i < num_mcv; i++)
  			{
+ 				int			cur_remaining_samples = remaining_samples;
+ 				double		avgcount,
+ 							mincount;
+ 
+ 				/* remove current value from remaining_samples and d */
+ 				remaining_samples -= track[i].count;
+ 				d--;
+ 				if (d > 0)
+ 				{
+ 					/* get avg # occurrences of distinct values after this */
+ 					avgcount = (double) remaining_samples / (double) d;
+ 					/* set minimum count to accept a value (is surely > 1) */
+ 					mincount = avgcount * 1.50;
+ 				}
+ 				else
+ 				{
+ 					/* last value, use 1% rule */
+ 					mincount = values_cnt * 0.01;
+ 
+ 					/* here, we need a clamp to avoid accepting count 1 */
+ 					if (mincount < 2)
+ 						mincount = 2;
+ 				}
+ 
+ 				/* don't let threshold exceed 1/K, however */
+ 				if (num_hist >= 2)		/* else we won't make a histogram */
+ 				{
+ 					double		hfreq;
+ 
+ 					hfreq = (double) cur_remaining_samples / (double) num_hist;
+ 					if (mincount > hfreq)
+ 						mincount = hfreq;
+ 					/* hfreq could be 1, so clamp to avoid accepting count 1 */
+ 					if (mincount < 2)
+ 						mincount = 2;
+ 				}
+ 				/* if this value falls below threshold, we're done */
  				if (track[i].count < mincount)
  				{
  					num_mcv = i;
  					break;
  				}
+ 				/* update planned histogram size, removing this value */
+ 				num_hist = Min(ndistinct - (i + 1), num_bins + 1);
  			}
  		}
  
*************** compute_scalar_stats(VacAttrStatsP stats
*** 2560,2568 ****
  		 * values not accounted for in the MCV list.  (This ensures the
  		 * histogram won't collapse to empty or a singleton.)
  		 */
- 		num_hist = ndistinct - num_mcv;
- 		if (num_hist > num_bins)
- 			num_hist = num_bins + 1;
  		if (num_hist >= 2)
  		{
  			MemoryContext old_context;
--- 2642,2647 ----
