diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml index 88cf8b3933..31ef39bf10 100644 --- a/doc/src/sgml/ref/pgbench.sgml +++ b/doc/src/sgml/ref/pgbench.sgml @@ -917,7 +917,7 @@ pgbench options d default_seed - seed used in hash functions by default + seed used in hash and pseudo-random permutation functions by default @@ -1370,6 +1370,13 @@ pgbench options d pow(2.0, 10), power(2.0, 10) 1024.0 + + pr_perm(i, size [, seed ] ) + integer + pseudo-random permutation in [0,size) + pr_perm(0, 4) + 0, 1, 2 or 3 + random(lb, ub) integer @@ -1531,6 +1538,21 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) / + + Function pr_perm implements a pseudo-random permutation. + It permutes integers in [0, size) using a seed by applying rounds of + simple invertible functions, similarly to an encryption function, + although beware that it is not at all cryptographically secure. + It allows to mix the output of non uniform random functions so that + values drawn more often are not correlated. + Values outside the interval are interpreted modulo the size. + The function errors if size is not positive. + If no seed is provided, :default_seed is used. + Compared to hash functions discussed above, the function + ensures that a perfect permutation is applied: there are no collisions + nor holes in the output values. + + As an example, the full definition of the built-in TPC-B-like transaction is: diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y index f7c56cc6a3..762a62959b 100644 --- a/src/bin/pgbench/exprparse.y +++ b/src/bin/pgbench/exprparse.y @@ -19,6 +19,7 @@ #define PGBENCH_NARGS_VARIABLE (-1) #define PGBENCH_NARGS_CASE (-2) #define PGBENCH_NARGS_HASH (-3) +#define PGBENCH_NARGS_PRPERM (-4) PgBenchExpr *expr_parse_result; @@ -366,6 +367,9 @@ static const struct { "hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A }, + { + "pr_perm", PGBENCH_NARGS_PRPERM, PGBENCH_PRPERM + }, /* keep as last array element */ { NULL, 0, 0 @@ -478,6 +482,19 @@ make_func(yyscan_t yyscanner, int fnumber, PgBenchExprList *args) } break; + /* pseudo-random permutation function with optional seed argument */ + case PGBENCH_NARGS_PRPERM: + if (len < 2 || len > 3) + expr_yyerror_more(yyscanner, "unexpected number of arguments", + PGBENCH_FUNCTIONS[fnumber].fname); + + if (len == 2) + { + PgBenchExpr *var = make_variable("default_seed"); + args = make_elist(var, args); + } + break; + /* common case: positive arguments number */ default: Assert(PGBENCH_FUNCTIONS[fnumber].nargs >= 0); diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index 41b756c089..7d21644c77 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -986,6 +986,119 @@ getHashMurmur2(int64 val, uint64 seed) return (int64) result; } +/* pseudo-random permutation */ + +/* 16 so that % 16 can be optimized to & 0x0f */ +#define PRP_PRIMES 16 +/* 27-29 bits mega primes from https://primes.utm.edu/lists/small/millions/ */ +static int64 primes[PRP_PRIMES] = { + INT64CONST(122949829), + INT64CONST(141650963), + INT64CONST(160481219), + INT64CONST(179424691), + INT64CONST(198491329), + INT64CONST(217645199), + INT64CONST(236887699), + INT64CONST(256203221), + INT64CONST(275604547), + INT64CONST(295075153), + INT64CONST(314606891), + INT64CONST(334214467), + INT64CONST(353868019), + INT64CONST(373587911), + INT64CONST(393342743), + INT64CONST(413158523) +}; + +/* how many "encryption" rounds to apply */ +#define PRP_ROUNDS 4 + +/* return largest mask in 0 .. n-1 */ +static uint64 compute_prp_mask(uint64 n) +{ + n |= n >> 1; + n |= n >> 2; + n |= n >> 4; + n |= n >> 8; + n |= n >> 16; + n |= n >> 32; + return n >> 1; +} + +/* Donald Knuth linear congruential generator */ +#define DK_LCG_MUL INT64CONST(6364136223846793005) +#define DK_LCG_INC INT64CONST(1442695040888963407) + +/* do not use all small bits */ +#define LCG_SHIFT 13 + +/* + * PRP: parametric pseudo-random permutation + * + * Result in [0, size) is a permutation for inputs in the same set. + * + * Note that this function does not pass statistical tests: eg + * permutations of 2, 3, 4 or 5 ints are not strictly equiprobable. + * However it is inexpensive compared to an actual encryption function, + * and the quality is good enough to avoid trivial correlations on + * large sizes, which is the expected use case. + * + * THIS FUNCTION IS NOT CRYPTOGRAPHICALLY SECURE. + * PLEASE DO NOT USE FOR SUCH PURPOSE. + */ +static int64 +pseudorandom_perm(const int64 data, const int64 isize, const int64 seed) +{ + /* computations are performed on unsigned values */ + uint64 key = (uint64) seed; + uint64 size = (uint64) isize; + uint64 v = (uint64) data % size; + /* size-1: ensures 2 possibly overlapping halves */ + uint64 mask = compute_prp_mask(size-1); + + unsigned int i, p; + + /* nothing to permute */ + if (isize == 1) + return 0; + + Assert(isize >= 2); + + /* apply 4 rounds of bijective transformations: + * (1) scramble: partial xors on power-or-2 subsets + * (2) scatter: linear modulo + */ + for (i = 0, p = key % PRP_PRIMES; i < PRP_ROUNDS; i++, p = (p + 1) % PRP_PRIMES) + { + uint64 t; + + /* first "half" whitening, for v in 0 .. mask */ + key = key * DK_LCG_MUL + DK_LCG_INC; + if (v <= mask) + v ^= (key >> LCG_SHIFT) & mask; + + /* second (possibly overlapping) "half" whitening */ + key = key * DK_LCG_MUL + DK_LCG_INC; + t = size - 1 - v; + if (t <= mask) + { + t ^= (key >> LCG_SHIFT) & mask; + v = size - 1 - t; + } + + /* at most 2 primes are skipped for a given size */ + while (unlikely(size % primes[p] == 0)) + p = (p + 1) % PRP_PRIMES; + + /* scatter values with a prime multiplication */ + key = key * DK_LCG_MUL + DK_LCG_INC; + v = (primes[p] * v + (key >> LCG_SHIFT)) % size; + } + + /* back to signed */ + return (int64) v; +} + /* * Initialize the given SimpleStats struct to all zeroes */ @@ -2319,6 +2432,26 @@ evalStandardFunc(TState *thread, CState *st, return true; } + case PGBENCH_PRPERM: + { + int64 val, size, seed; + Assert(nargs == 3); + + if (!coerceToInt(&vargs[0], &val) || + !coerceToInt(&vargs[1], &size) || + !coerceToInt(&vargs[2], &seed)) + return false; + + if (size < 1) + { + fprintf(stderr, "pr_perm size parameter must be >= 1\n"); + return false; + } + + setIntValue(retval, pseudorandom_perm(val, size, seed)); + return true; + } + default: /* cannot get here */ Assert(0); diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h index 6983865b92..665c450c2c 100644 --- a/src/bin/pgbench/pgbench.h +++ b/src/bin/pgbench/pgbench.h @@ -99,7 +99,8 @@ typedef enum PgBenchFunction PGBENCH_IS, PGBENCH_CASE, PGBENCH_HASH_FNV1A, - PGBENCH_HASH_MURMUR2 + PGBENCH_HASH_MURMUR2, + PGBENCH_PRPERM } PgBenchFunction; typedef struct PgBenchExpr PgBenchExpr; diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl index 2fc021dde7..0aec384eca 100644 --- a/src/bin/pgbench/t/001_pgbench_with_server.pl +++ b/src/bin/pgbench/t/001_pgbench_with_server.pl @@ -322,6 +322,14 @@ pgbench( qr{command=96.: int 1\b}, # :scale qr{command=97.: int 0\b}, # :client_id qr{command=98.: int 5432\b}, # :random_seed + qr{command=99.: boolean true\b}, + qr{command=100.: boolean true\b}, + qr{command=101.: boolean true\b}, + qr{command=102.: boolean true\b}, + qr{command=103.: boolean true\b}, + qr{command=107.: boolean true\b}, + qr{command=108.: boolean true\b}, + qr{command=109.: boolean true\b}, ], 'pgbench expressions', { @@ -447,6 +455,24 @@ SELECT :v0, :v1, :v2, :v3; \set sc debug(:scale) \set ci debug(:client_id) \set rs debug(:random_seed) +-- pseudo-random permutation +\set t debug(pr_perm(0, 2) + pr_perm(1, 2) = 1) +\set t debug(pr_perm(0, 3) + pr_perm(1, 3) + pr_perm(2, 3) = 3) +\set t debug(pr_perm(0, 4) + pr_perm(1, 4) + pr_perm(2, 4) + pr_perm(3, 4) = 6) +\set t debug(pr_perm(0, 5) + pr_perm(1, 5) + pr_perm(2, 5) + pr_perm(3, 5) + pr_perm(4, 5) = 10) +\set t debug(pr_perm(0, 16) + pr_perm(1, 16) + pr_perm(2, 16) + pr_perm(3, 16) + \ + pr_perm(4, 16) + pr_perm(5, 16) + pr_perm(6, 16) + pr_perm(7, 16) + \ + pr_perm(8, 16) + pr_perm(9, 16) + pr_perm(10, 16) + pr_perm(11, 16) + \ + pr_perm(12, 16) + pr_perm(13, 16) + pr_perm(14, 16) + pr_perm(15, 16) = 120) +-- random sanity check +\set size random(2, 1000) +\set v random(0, :size - 1) +\set p pr_perm(:v, :size) +\set t debug(0 <= :p and :p < :size and :p = pr_perm(:v + :size, :size) and :p <> pr_perm(:v + 1, :size)) +-- actual values +\set t debug(pr_perm(:v, 1) = 0) +\set t debug(pr_perm(0, 2, 5432) = 0 and pr_perm(1, 2, 5432) = 1 and \ + pr_perm(0, 2, 5431) = 1 and pr_perm(1, 2, 5431) = 0) } }); @@ -731,6 +757,10 @@ SELECT LEAST(:i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i); [ 'bad boolean', 0, [qr{malformed variable.*trueXXX}], q{\set b :badtrue or true} + ], + [ + 'invalid pr_perm size', 0, + [qr{pr_perm size parameter must be >= 1}], q{\set i pr_perm(0, 0)} ],); diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl index c1c2c1e3d4..ff02cfb46b 100644 --- a/src/bin/pgbench/t/002_pgbench_no_server.pl +++ b/src/bin/pgbench/t/002_pgbench_no_server.pl @@ -290,6 +290,16 @@ my @script_tests = ( 'too many arguments for hash', [qr{unexpected number of arguments \(hash\)}], { 'bad-hash-2.sql' => "\\set i hash(1,2,3)\n" } + ], + [ + 'not enough arguments for pr_perm', + [qr{unexpected number of arguments \(pr_perm\)}], + { 'bad-pr_perm-1.sql' => "\\set i pr_perm(1)\n" } + ], + [ + 'too many arguments for pr_perm', + [qr{unexpected number of arguments \(pr_perm\)}], + { 'bad-pr_perm-2.sql' => "\\set i pr_perm(1, 2, 3, 4)\n" } ],); for my $t (@script_tests)