diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml index 0ac40f1..da3c792 100644 --- a/doc/src/sgml/ref/pgbench.sgml +++ b/doc/src/sgml/ref/pgbench.sgml @@ -788,7 +788,7 @@ pgbench options dbname - \setrandom varname min max [ uniform | { gaussian | exponential } threshold ] + \setrandom varname min max [ uniform | { gaussian | exponential } param ] @@ -804,54 +804,63 @@ pgbench options dbname By default, or when uniform is specified, all values in the range are drawn with equal probability. Specifying gaussian or exponential options modifies this behavior; each - requires a mandatory threshold which determines the precise shape of the + requires a mandatory parameter which determines the precise shape of the distribution. For a Gaussian distribution, the interval is mapped onto a standard normal distribution (the classical bell-shaped Gaussian curve) truncated - at -threshold on the left and +threshold + at -param on the left and +param on the right. + Values in the middle of the interval are more likely to be drawn. To be precise, if PHI(x) is the cumulative distribution function of the standard normal distribution, with mean mu - defined as (max + min) / 2.0, then value i - between min and max inclusive is drawn - with probability: - - (PHI(2.0 * threshold * (i - min - mu + 0.5) / (max - min + 1)) - - PHI(2.0 * threshold * (i - min - mu - 0.5) / (max - min + 1))) / - (2.0 * PHI(threshold) - 1.0). - Intuitively, the larger the threshold, the more + defined as (max+min)/2, with + +f(x) = PHI(2 * param * (x-mu) / (max-min+1)) / (2 * PHI(param) - 1) + + then value i between min and + max inclusive is drawn with probability: + f(i+0.5) - f(i-0.5). + Intuitively, the larger the param, the more frequently values close to the middle of the interval are drawn, and the less frequently values close to the min and max bounds. - About 67% of values are drawn from the middle 1.0 / threshold - and 95% in the middle 2.0 / threshold; for instance, if - threshold is 4.0, 67% of values are drawn from the middle - quarter and 95% from the middle half of the interval. - The minimum threshold is 2.0 for performance of + About 67% of values are drawn from the middle 1/param, + that is a relative 0.5/param around the mean, + and 95% in the middle 2/param, that is + a relative 1/param around the mean; + for instance, if param is 4.0, 67% of values are drawn + from the middle quarter (1/4.0) of the interval + (i.e. from 3/8 to 5/8) + and 95% from the middle half (2/4.0) of the interval (second and third + quartiles). + The minimum param is 2.0 for performance of the Box-Muller transform. - For an exponential distribution, the threshold - parameter controls the distribution by truncating a quickly-decreasing - exponential distribution at threshold, and then + For an exponential distribution, the param parameter + controls the distribution by truncating a quickly-decreasing + exponential distribution at param, and then projecting onto integers between the bounds. - To be precise, value i between min and + To be precise, with + +f(x) = exp(-param * (x-min) / (max-min+1)) / (1 - exp(-param)) + + Then value i between min and max inclusive is drawn with probability: - (exp(-threshold*(i-min)/(max+1-min)) - - exp(-threshold*(i+1-min)/(max+1-min))) / (1.0 - exp(-threshold)). - Intuitively, the larger the threshold, the more + f(x) - f(x+1). + Intuitively, the larger the param, the more frequently values close to min are accessed, and the less frequently values close to max are accessed. - The closer to 0 the threshold, the flatter (more uniform) the access + The closer to 0 the parameter, the flatter (more uniform) the access distribution. A crude approximation of the distribution is that the most frequent 1% values in the range, close to min, are drawn - threshold% of the time. - The threshold value must be strictly positive. + param% of the time. + The param value must be strictly positive. diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index f2d435b..60ad22e 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -90,7 +90,7 @@ static int pthread_join(pthread_t th, void **thread_return); #define LOG_STEP_SECONDS 5 /* seconds between log messages */ #define DEFAULT_NXACTS 10 /* default nxacts */ -#define MIN_GAUSSIAN_THRESHOLD 2.0 /* minimum threshold for gauss */ +#define MIN_GAUSSIAN_PARAM 2.0 /* minimum parameter for gauss */ int nxacts = 0; /* number of transactions per client */ int duration = 0; /* duration in seconds */ @@ -488,47 +488,47 @@ getrand(TState *thread, int64 min, int64 max) /* * random number generator: exponential distribution from min to max inclusive. - * the threshold is so that the density of probability for the last cut-off max - * value is exp(-threshold). + * the parameter is so that the density of probability for the last cut-off max + * value is exp(-param). */ static int64 -getExponentialRand(TState *thread, int64 min, int64 max, double threshold) +getExponentialRand(TState *thread, int64 min, int64 max, double param) { double cut, uniform, rand; - Assert(threshold > 0.0); - cut = exp(-threshold); + Assert(param > 0.0); + cut = exp(-param); /* erand in [0, 1), uniform in (0, 1] */ uniform = 1.0 - pg_erand48(thread->random_state); /* - * inner expresion in (cut, 1] (if threshold > 0), rand in [0, 1) + * inner expresion in (cut, 1] (if param > 0), rand in [0, 1) */ Assert((1.0 - cut) != 0.0); - rand = -log(cut + (1.0 - cut) * uniform) / threshold; + rand = -log(cut + (1.0 - cut) * uniform) / param; /* return int64 random number within between min and max */ return min + (int64) ((max - min + 1) * rand); } /* random number generator: gaussian distribution from min to max inclusive */ static int64 -getGaussianRand(TState *thread, int64 min, int64 max, double threshold) +getGaussianRand(TState *thread, int64 min, int64 max, double param) { double stdev; double rand; /* - * Get user specified random number from this loop, with -threshold < - * stdev <= threshold + * Get user specified random number from this loop, + * with -param < stdev <= param * * This loop is executed until the number is in the expected range. * - * As the minimum threshold is 2.0, the probability of looping is low: + * As the minimum parameter is 2.0, the probability of looping is low: * sqrt(-2 ln(r)) <= 2 => r >= e^{-2} ~ 0.135, then when taking the * average sinus multiplier as 2/pi, we have a 8.6% looping probability in - * the worst case. For a 5.0 threshold value, the looping probability is + * the worst case. For a 5.0 param value, the looping probability is * about e^{-5} * 2 / pi ~ 0.43%. */ do @@ -553,10 +553,10 @@ getGaussianRand(TState *thread, int64 min, int64 max, double threshold) * over. */ } - while (stdev < -threshold || stdev >= threshold); + while (stdev < -param || stdev >= param); - /* stdev is in [-threshold, threshold), normalization to [0,1) */ - rand = (stdev + threshold) / (threshold * 2.0); + /* stdev is in [-param, param), normalization to [0,1) */ + rand = (stdev + param) / (param * 2.0); /* return int64 random number within between min and max */ return min + (int64) ((max - min + 1) * rand); @@ -1483,7 +1483,7 @@ top: char *var; int64 min, max; - double threshold = 0; + double param = 0; char res; if (*argv == ':') @@ -1554,41 +1554,41 @@ top: { if ((var = getVariable(st, argv + 1)) == NULL) { - fprintf(stderr, "%s: invalid threshold number: \"%s\"\n", + fprintf(stderr, "%s: invalid parameter: \"%s\"\n", argv, argv); st->ecnt++; return true; } - threshold = strtod(var, NULL); + param = strtod(var, NULL); } else - threshold = strtod(argv, NULL); + param = strtod(argv, NULL); if (pg_strcasecmp(argv, "gaussian") == 0) { - if (threshold < MIN_GAUSSIAN_THRESHOLD) + if (param < MIN_GAUSSIAN_PARAM) { - fprintf(stderr, "gaussian threshold must be at least %f (not \"%s\")\n", MIN_GAUSSIAN_THRESHOLD, argv); + fprintf(stderr, "gaussian parameter must be at least %f (not \"%s\")\n", MIN_GAUSSIAN_PARAM, argv); st->ecnt++; return true; } #ifdef DEBUG - printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getGaussianRand(thread, min, max, threshold)); + printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getGaussianRand(thread, min, max, param)); #endif - snprintf(res, sizeof(res), INT64_FORMAT, getGaussianRand(thread, min, max, threshold)); + snprintf(res, sizeof(res), INT64_FORMAT, getGaussianRand(thread, min, max, param)); } else if (pg_strcasecmp(argv, "exponential") == 0) { - if (threshold <= 0.0) + if (param <= 0.0) { - fprintf(stderr, "exponential threshold must be greater than zero (not \"%s\")\n", argv); + fprintf(stderr, "exponential parameter must be greater than zero (not \"%s\")\n", argv); st->ecnt++; return true; } #ifdef DEBUG - printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getExponentialRand(thread, min, max, threshold)); + printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getExponentialRand(thread, min, max, param)); #endif - snprintf(res, sizeof(res), INT64_FORMAT, getExponentialRand(thread, min, max, threshold)); + snprintf(res, sizeof(res), INT64_FORMAT, getExponentialRand(thread, min, max, param)); } } else /* this means an error somewhere in the parsing phase... */ @@ -2282,8 +2282,9 @@ process_commands(char *buf, const char *source, const int lineno) if (pg_strcasecmp(my_commands->argv, "setrandom") == 0) { /* - * parsing: \setrandom variable min max [uniform] \setrandom - * variable min max (gaussian|exponential) threshold + * parsing: + * \setrandom variable min max [uniform] + * \setrandom variable min max (gaussian|exponential) param */ if (my_commands->argc < 4) @@ -2308,7 +2309,7 @@ process_commands(char *buf, const char *source, const int lineno) if (my_commands->argc < 6) { syntax_error(source, lineno, my_commands->line, my_commands->argv, - "missing threshold argument", my_commands->argv, -1); + "missing parameter", my_commands->argv, -1); } else if (my_commands->argc > 6) {