diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml index 88cf8b3..31ef39b 100644 --- a/doc/src/sgml/ref/pgbench.sgml +++ b/doc/src/sgml/ref/pgbench.sgml @@ -917,7 +917,7 @@ pgbench options d default_seed - seed used in hash functions by default + seed used in hash and pseudo-random permutation functions by default @@ -1371,6 +1371,13 @@ pgbench options d 1024.0 + pr_perm(i, size [, seed ] ) + integer + pseudo-random permutation in [0,size) + pr_perm(0, 4) + 0, 1, 2 or 3 + + random(lb, ub) integer uniformly-distributed random integer in [lb, ub] @@ -1532,6 +1539,21 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) / + Function pr_perm implements a pseudo-random permutation. + It permutes integers in [0, size) using a seed by applying rounds of + simple invertible functions, similarly to an encryption function, + although beware that it is not at all cryptographically secure. + It allows to mix the output of non uniform random functions so that + values drawn more often are not correlated. + Values outside the interval are interpreted modulo the size. + The function errors if size is not positive. + If no seed is provided, :default_seed is used. + Compared to hash functions discussed above, the function + ensures that a perfect permutation is applied: there are no collisions + nor holes in the output values. + + + As an example, the full definition of the built-in TPC-B-like transaction is: diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y index f7c56cc..762a629 100644 --- a/src/bin/pgbench/exprparse.y +++ b/src/bin/pgbench/exprparse.y @@ -19,6 +19,7 @@ #define PGBENCH_NARGS_VARIABLE (-1) #define PGBENCH_NARGS_CASE (-2) #define PGBENCH_NARGS_HASH (-3) +#define PGBENCH_NARGS_PRPERM (-4) PgBenchExpr *expr_parse_result; @@ -366,6 +367,9 @@ static const struct { "hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A }, + { + "pr_perm", PGBENCH_NARGS_PRPERM, PGBENCH_PRPERM + }, /* keep as last array element */ { NULL, 0, 0 @@ -478,6 +482,19 @@ make_func(yyscan_t yyscanner, int fnumber, PgBenchExprList *args) } break; + /* pseudo-random permutation function with optional seed argument */ + case PGBENCH_NARGS_PRPERM: + if (len < 2 || len > 3) + expr_yyerror_more(yyscanner, "unexpected number of arguments", + PGBENCH_FUNCTIONS[fnumber].fname); + + if (len == 2) + { + PgBenchExpr *var = make_variable("default_seed"); + args = make_elist(var, args); + } + break; + /* common case: positive arguments number */ default: Assert(PGBENCH_FUNCTIONS[fnumber].nargs >= 0); diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index 41b756c..763bf6f 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -986,6 +986,215 @@ getHashMurmur2(int64 val, uint64 seed) return (int64) result; } +/* pseudo-random permutation */ + +/* 16 so that % 16 can be optimized to & 0x0f */ +#define PRP_PRIMES 16 +/* 27-29 bits mega primes from https://primes.utm.edu/lists/small/millions/ */ +static int64 primes[PRP_PRIMES] = { + INT64CONST(122949829), + INT64CONST(141650963), + INT64CONST(160481219), + INT64CONST(179424691), + INT64CONST(198491329), + INT64CONST(217645199), + INT64CONST(236887699), + INT64CONST(256203221), + INT64CONST(275604547), + INT64CONST(295075153), + INT64CONST(314606891), + INT64CONST(334214467), + INT64CONST(353868019), + INT64CONST(373587911), + INT64CONST(393342743), + INT64CONST(413158523) +}; + +/* how many "encryption" rounds to apply */ +#define PRP_ROUNDS 4 + +/* return largest mask in 0 .. n-1 */ +static uint64 compute_prp_mask(uint64 n) +{ + n |= n >> 1; + n |= n >> 2; + n |= n >> 4; + n |= n >> 8; + n |= n >> 16; + n |= n >> 32; + return n >> 1; +} + +/* + * Calculate (x * y) % m, where x and y in [0, 2^64), m in [1, 2^64). + * + * If x or y is greater than 2^32, improved interleaved modular + * multiplication algorithm is used to avoid overflow. + */ +static uint64 modular_multiplicate(uint64 x, uint64 y, const uint64 m) +{ + int i, bits; + uint64 r = 0; + + Assert(1 <= m); + + /* Because of (x * y) % m = (x % m * y % m) % m */ + if (x >= m) + x %= m; + if (y >= m) + y %= m; + + /* Return the trivial result. */ + if (x == 0 || y == 0 || m == 1) + return 0; + + /* Return the result if (x * y) can be multiplicated without overflow. */ + if ((x | y) < (0xffffffff)) + return (x * y) % m; + + /* To reduce the for loop in the algorithm below. */ + if (x < y) + { + uint64 tmp = x; + x = y; + y = tmp; + } + + /* Interleaved modular multiplication algorithm [1] + * + * This algorithm is usually used in the field of digital circuit + * design. + * + * Input: X, Y, M; 0 <= X, Y <= M; + * Output: R = X * Y mod M; + * bits: number of bits of Y + * Y[i]: i th bit of Y + * + * 1. R = 0; + * 2. for (i = bits - 1; i >= 0; i--) { + * 3. R = 2 * R; + * 4. if (Y[i] == 0x1) + * 5. R += X; + * 6. if (R >= M) R -= M; + * 7. if (R >= M) R -= M; + * } + * + * In Steps 3 and 5, overflow should be avoided. + * Steps 6 and 7 can be instead of a modular operation (R %= M). + * + * Reference + * [1] D.N. Amanor, et al, "Efficient hardware architecture for + * modular multiplication on FPGAs", in Field Programmable + * Logic and Apllications, 2005. International Conference on, + * Aug 2005, pp. 539-542. + */ + + bits = 64; + while (bits > 0 && (y >> (64 - bits) | 0x1) == 0) + bits--; + + for (i = bits - 1; i >= 0; i--) + { + if (r > 0x7fffffffffffffff) + /* To avoid overflow, transform from (2 * r) to + * (2 * r) % m, and further transform to + * mathematically equivalent form shown below: + */ + r = m - ((m - r) << 1); + else + r <<= 1; + + if ((y >> i) & 0x1) + { + /* Calculate (r + x) without overflow using same + * transformations described in the above comment. + */ + if (m > 0x7fffffffffffffff) + r = ((m - r) > x) ? r + x : r + x - m; + else + r = (r > m) ? r - m + x : r + x; + } + + r %= m; + } + + return r; +} + +/* Donald Knuth linear congruential generator */ +#define DK_LCG_MUL INT64CONST(6364136223846793005) +#define DK_LCG_INC INT64CONST(1442695040888963407) + +/* do not use all small bits */ +#define LCG_SHIFT 13 + +/* + * PRP: parametric pseudo-random permutation + * + * Result in [0, size) is a permutation for inputs in the same set. + * + * Note that this function does not pass statistical tests: eg + * permutations of 2, 3, 4 or 5 ints are not strictly equiprobable. + * However it is inexpensive compared to an actual encryption function, + * and the quality is good enough to avoid trivial correlations on + * large sizes, which is the expected use case. + * + * THIS FUNCTION IS NOT CRYPTOGRAPHICALLY SECURE. + * PLEASE DO NOT USE FOR SUCH PURPOSE. + */ +static int64 +pseudorandom_perm(const int64 data, const int64 isize, const int64 seed) +{ + /* computations are performed on unsigned values */ + uint64 key = (uint64) seed; + uint64 size = (uint64) isize; + uint64 v = (uint64) data % size; + /* size-1: ensures 2 possibly overlapping halves */ + uint64 mask = compute_prp_mask(size-1); + + unsigned int i, p; + + /* nothing to permute */ + if (isize == 1) + return 0; + + Assert(isize >= 2); + + /* apply 4 rounds of bijective transformations: + * (1) scramble: partial xors on power-or-2 subsets + * (2) scatter: linear modulo + */ + for (i = 0, p = key % PRP_PRIMES; i < PRP_ROUNDS; i++, p = (p + 1) % PRP_PRIMES) + { + uint64 t; + + /* first "half" whitening, for v in 0 .. mask */ + key = key * DK_LCG_MUL + DK_LCG_INC; + if (v <= mask) + v ^= (key >> LCG_SHIFT) & mask; + + /* second (possibly overlapping) "half" whitening */ + key = key * DK_LCG_MUL + DK_LCG_INC; + t = size - 1 - v; + if (t <= mask) + { + t ^= (key >> LCG_SHIFT) & mask; + v = size - 1 - t; + } + + /* at most 2 primes are skipped for a given size */ + while (unlikely(size % primes[p] == 0)) + p = (p + 1) % PRP_PRIMES; + + /* scatter values with a prime multiplication */ + key = key * DK_LCG_MUL + DK_LCG_INC; + v = (modular_multiplicate((uint64)primes[p], v, size) + (key >> LCG_SHIFT)) % size; + } + + /* back to signed */ + return (int64) v; +} + /* * Initialize the given SimpleStats struct to all zeroes */ @@ -2319,6 +2528,26 @@ evalStandardFunc(TState *thread, CState *st, return true; } + case PGBENCH_PRPERM: + { + int64 val, size, seed; + Assert(nargs == 3); + + if (!coerceToInt(&vargs[0], &val) || + !coerceToInt(&vargs[1], &size) || + !coerceToInt(&vargs[2], &seed)) + return false; + + if (size < 1) + { + fprintf(stderr, "pr_perm size parameter must be >= 1\n"); + return false; + } + + setIntValue(retval, pseudorandom_perm(val, size, seed)); + return true; + } + default: /* cannot get here */ Assert(0); diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h index 6983865..665c450 100644 --- a/src/bin/pgbench/pgbench.h +++ b/src/bin/pgbench/pgbench.h @@ -99,7 +99,8 @@ typedef enum PgBenchFunction PGBENCH_IS, PGBENCH_CASE, PGBENCH_HASH_FNV1A, - PGBENCH_HASH_MURMUR2 + PGBENCH_HASH_MURMUR2, + PGBENCH_PRPERM } PgBenchFunction; typedef struct PgBenchExpr PgBenchExpr; diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl index 2fc021d..0aec384 100644 --- a/src/bin/pgbench/t/001_pgbench_with_server.pl +++ b/src/bin/pgbench/t/001_pgbench_with_server.pl @@ -322,6 +322,14 @@ pgbench( qr{command=96.: int 1\b}, # :scale qr{command=97.: int 0\b}, # :client_id qr{command=98.: int 5432\b}, # :random_seed + qr{command=99.: boolean true\b}, + qr{command=100.: boolean true\b}, + qr{command=101.: boolean true\b}, + qr{command=102.: boolean true\b}, + qr{command=103.: boolean true\b}, + qr{command=107.: boolean true\b}, + qr{command=108.: boolean true\b}, + qr{command=109.: boolean true\b}, ], 'pgbench expressions', { @@ -447,6 +455,24 @@ SELECT :v0, :v1, :v2, :v3; \set sc debug(:scale) \set ci debug(:client_id) \set rs debug(:random_seed) +-- pseudo-random permutation +\set t debug(pr_perm(0, 2) + pr_perm(1, 2) = 1) +\set t debug(pr_perm(0, 3) + pr_perm(1, 3) + pr_perm(2, 3) = 3) +\set t debug(pr_perm(0, 4) + pr_perm(1, 4) + pr_perm(2, 4) + pr_perm(3, 4) = 6) +\set t debug(pr_perm(0, 5) + pr_perm(1, 5) + pr_perm(2, 5) + pr_perm(3, 5) + pr_perm(4, 5) = 10) +\set t debug(pr_perm(0, 16) + pr_perm(1, 16) + pr_perm(2, 16) + pr_perm(3, 16) + \ + pr_perm(4, 16) + pr_perm(5, 16) + pr_perm(6, 16) + pr_perm(7, 16) + \ + pr_perm(8, 16) + pr_perm(9, 16) + pr_perm(10, 16) + pr_perm(11, 16) + \ + pr_perm(12, 16) + pr_perm(13, 16) + pr_perm(14, 16) + pr_perm(15, 16) = 120) +-- random sanity check +\set size random(2, 1000) +\set v random(0, :size - 1) +\set p pr_perm(:v, :size) +\set t debug(0 <= :p and :p < :size and :p = pr_perm(:v + :size, :size) and :p <> pr_perm(:v + 1, :size)) +-- actual values +\set t debug(pr_perm(:v, 1) = 0) +\set t debug(pr_perm(0, 2, 5432) = 0 and pr_perm(1, 2, 5432) = 1 and \ + pr_perm(0, 2, 5431) = 1 and pr_perm(1, 2, 5431) = 0) } }); @@ -731,6 +757,10 @@ SELECT LEAST(:i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i); [ 'bad boolean', 0, [qr{malformed variable.*trueXXX}], q{\set b :badtrue or true} + ], + [ + 'invalid pr_perm size', 0, + [qr{pr_perm size parameter must be >= 1}], q{\set i pr_perm(0, 0)} ],); diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl index c1c2c1e..ff02cfb 100644 --- a/src/bin/pgbench/t/002_pgbench_no_server.pl +++ b/src/bin/pgbench/t/002_pgbench_no_server.pl @@ -290,6 +290,16 @@ my @script_tests = ( 'too many arguments for hash', [qr{unexpected number of arguments \(hash\)}], { 'bad-hash-2.sql' => "\\set i hash(1,2,3)\n" } + ], + [ + 'not enough arguments for pr_perm', + [qr{unexpected number of arguments \(pr_perm\)}], + { 'bad-pr_perm-1.sql' => "\\set i pr_perm(1)\n" } + ], + [ + 'too many arguments for pr_perm', + [qr{unexpected number of arguments \(pr_perm\)}], + { 'bad-pr_perm-2.sql' => "\\set i pr_perm(1, 2, 3, 4)\n" } ],); for my $t (@script_tests)