diff --git a/config/c-compiler.m4 b/config/c-compiler.m4 index af2dea1..5b09f73 100644 --- a/config/c-compiler.m4 +++ b/config/c-compiler.m4 @@ -327,6 +327,26 @@ fi])# PGAC_C_BUILTIN_BSWAP64 +# PGAC_C_BUILTIN_CLZLL +# ------------------------- +# Check if the C compiler understands __builtin_clzll(), +# and define HAVE__BUILTIN_CLZLL if so. +# Both GCC & CLANG seem to have one. +AC_DEFUN([PGAC_C_BUILTIN_CLZLL], +[AC_CACHE_CHECK(for __builtin_clzll, pgac_cv__builtin_clzll, +[AC_COMPILE_IFELSE([AC_LANG_SOURCE( +[static unsigned long int x = __builtin_clzll(0xaabbccddeeff0011);] +)], +[pgac_cv__builtin_clzll=yes], +[pgac_cv__builtin_clzll=no])]) +if test x"$pgac_cv__builtin_clzll" = xyes ; then +AC_DEFINE(HAVE__BUILTIN_CLZLL, 1, + [Define to 1 if your compiler understands __builtin_clzll.]) +fi])# PGAC_C_BUILTIN_CLZLL + + + + # PGAC_C_BUILTIN_CONSTANT_P # ------------------------- # Check if the C compiler understands __builtin_constant_p(), diff --git a/configure b/configure index b7250d7..a6dc740 100755 --- a/configure +++ b/configure @@ -13951,6 +13951,30 @@ if test x"$pgac_cv__builtin_bswap64" = xyes ; then $as_echo "#define HAVE__BUILTIN_BSWAP64 1" >>confdefs.h fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clzll" >&5 +$as_echo_n "checking for __builtin_clzll... " >&6; } +if ${pgac_cv__builtin_clzll+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +static unsigned long int x = __builtin_clzll(0xaabbccddeeff0011); + +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + pgac_cv__builtin_clzll=yes +else + pgac_cv__builtin_clzll=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_clzll" >&5 +$as_echo "$pgac_cv__builtin_clzll" >&6; } +if test x"$pgac_cv__builtin_clzll" = xyes ; then + +$as_echo "#define HAVE__BUILTIN_CLZLL 1" >>confdefs.h + +fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_constant_p" >&5 $as_echo_n "checking for __builtin_constant_p... " >&6; } if ${pgac_cv__builtin_constant_p+:} false; then : diff --git a/configure.in b/configure.in index de5f777..e872875 100644 --- a/configure.in +++ b/configure.in @@ -1485,6 +1485,7 @@ PGAC_C_TYPES_COMPATIBLE PGAC_C_BUILTIN_BSWAP16 PGAC_C_BUILTIN_BSWAP32 PGAC_C_BUILTIN_BSWAP64 +PGAC_C_BUILTIN_CLZLL PGAC_C_BUILTIN_CONSTANT_P PGAC_C_BUILTIN_UNREACHABLE PGAC_C_COMPUTED_GOTO diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml index 8c464c9..265a91d 100644 --- a/doc/src/sgml/ref/pgbench.sgml +++ b/doc/src/sgml/ref/pgbench.sgml @@ -917,7 +917,7 @@ pgbench options d default_seed - seed used in hash functions by default + seed used in hash and pseudo-random permutation functions by default @@ -1378,6 +1378,13 @@ pgbench options d 1024.0 + pr_perm(i, size [, seed ] ) + integer + pseudo-random permutation in [0,size) + pr_perm(0, 4) + 0, 1, 2 or 3 + + random(lb, ub) integer uniformly-distributed random integer in [lb, ub] @@ -1539,6 +1546,24 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) / + Function pr_perm implements a pseudo-random permutation. + It allows to mix the output of non uniform random functions so that + values drawn more often are not trivially correlated. + It permutes integers in [0, size) using a seed by applying rounds of + simple invertible functions, similarly to an encryption function, + although beware that it is not at all cryptographically secure. + Compared to hash functions discussed above, the function + ensures that a perfect permutation is applied: there are no collisions + nor holes in the output values. + Values outside the interval are interpreted modulo the size. + The function errors if size is not positive. + If no seed is provided, :default_seed is used. + For a given size and seed, the function is fully deterministic: if two + permutations on the same size must not be correlated, use distinct seeds + as outlined in the previous example about hash functions. + + + As an example, the full definition of the built-in TPC-B-like transaction is: diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y index bab6f8a..94fcebb 100644 --- a/src/bin/pgbench/exprparse.y +++ b/src/bin/pgbench/exprparse.y @@ -19,6 +19,7 @@ #define PGBENCH_NARGS_VARIABLE (-1) #define PGBENCH_NARGS_CASE (-2) #define PGBENCH_NARGS_HASH (-3) +#define PGBENCH_NARGS_PRPERM (-4) PgBenchExpr *expr_parse_result; @@ -370,6 +371,9 @@ static const struct { "hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A }, + { + "pr_perm", PGBENCH_NARGS_PRPERM, PGBENCH_PRPERM + }, /* keep as last array element */ { NULL, 0, 0 @@ -482,6 +486,19 @@ make_func(yyscan_t yyscanner, int fnumber, PgBenchExprList *args) } break; + /* pseudo-random permutation function with optional seed argument */ + case PGBENCH_NARGS_PRPERM: + if (len < 2 || len > 3) + expr_yyerror_more(yyscanner, "unexpected number of arguments", + PGBENCH_FUNCTIONS[fnumber].fname); + + if (len == 2) + { + PgBenchExpr *var = make_variable("default_seed"); + args = make_elist(var, args); + } + break; + /* common case: positive arguments number */ default: Assert(PGBENCH_FUNCTIONS[fnumber].nargs >= 0); diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index 436764b..28bde41 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -1066,6 +1066,254 @@ getHashMurmur2(int64 val, uint64 seed) return (int64) result; } +/* pseudo-random permutation */ + +/* 16 so that % 16 can be optimized to & 0x0f */ +#define PRP_PRIMES 16 +/* + * 24 bits mega primes from https://primes.utm.edu/lists/small/millions/ + * the i-th prime, i \in [0, 15], is the first prime above 2^23 + i * 2^19 + */ +static uint64 primes[PRP_PRIMES] = { + UINT64CONST(8388617), + UINT64CONST(8912921), + UINT64CONST(9437189), + UINT64CONST(9961487), + UINT64CONST(10485767), + UINT64CONST(11010059), + UINT64CONST(11534351), + UINT64CONST(12058679), + UINT64CONST(12582917), + UINT64CONST(13107229), + UINT64CONST(13631489), + UINT64CONST(14155777), + UINT64CONST(14680067), + UINT64CONST(15204391), + UINT64CONST(15728681), + UINT64CONST(16252967) +}; + +/* how many "encryption" rounds to apply */ +#define PRP_ROUNDS 4 + +/* return smallest mask holding n */ +static uint64 compute_mask(uint64 n) +{ + n |= n >> 1; + n |= n >> 2; + n |= n >> 4; + n |= n >> 8; + n |= n >> 16; + n |= n >> 32; + return n; +} + +/* length of n binary representation */ +static int nbits(uint64 n) +{ +#ifdef HAVE__BUILTIN_CLZLL + return 64 - (n != 0 ? __builtin_clzll(n) : 64); +#else /* use clever no branching bitwise operator version */ + /* set all lower bits to 1 */ + n = compute_mask(n); + /* then count them */ + n -= (n >> 1) & UINT64CONST(0x5555555555555555); + n = (n & UINT64CONST(0x3333333333333333)) + ((n >> 2) & UINT64CONST(0x3333333333333333)); + n = (n + (n >> 4)) & UINT64CONST(0x0F0F0F0F0F0F0F0F); + return (n * UINT64CONST(0x0101010101010101)) >> 56; +#endif /* HAVE__BUILTIN_CLZLL */ +} + +/* + * Compute (x * y) % m, where x and y in [0, 2^64), m in [1, 2^64). + * + * Use improved interleaved modular multiplication algorithm to avoid + * overflows when necessary. + */ +static uint64 modular_multiply(uint64 x, uint64 y, const uint64 m) +{ + int i, bits; + uint64 r; + + Assert(m >= 1); + + /* Because of (x * y) % m = (x % m * y % m) % m */ + if (x >= m) + x %= m; + if (y >= m) + y %= m; + + /* Return the trivial result. */ + if (x == 0 || y == 0 || m == 1) + return 0; + + /* Return the result if (x * y) can be multiplied without overflow. */ + if (nbits(x) + nbits(y) <= 64) + return (x * y) % m; + + /* To reduce the for loop in the algorithm below, ensure y <= x. */ + if (x < y) + { + uint64 tmp = x; + x = y; + y = tmp; + } + + /* Interleaved modular multiplication algorithm from: + * + * D.N. Amanor et al., "Efficient hardware architecture for + * modular multiplication on FPGAs", in International Conference on + * Field Programmable Logic and Applications, Aug 2005, pp. 539-542. + * + * This algorithm is usually used in the field of digital circuit + * design. + * + * Input: X, Y, M; 0 <= X, Y <= M; + * Output: R = X * Y mod M; + * bits: number of bits of Y + * Y[i]: i th bit of Y + * + * 1. R = 0; + * 2. for (i = bits - 1; i >= 0; i--) { + * 3. R = 2 * R; + * 4. if (Y[i] == 0x1) + * 5. R += X; + * 6. if (R >= M) R -= M; + * 7. if (R >= M) R -= M; + * } + * + * In Steps 3 and 5, overflow should be avoided. + * Steps 6 and 7 can be instead a modular operation (R %= M). + */ + + bits = nbits(y); + r = 0; + + for (i = bits - 1; i >= 0; i--) + { + if (r > UINT64CONST(0x7fffffffffffffff)) + /* To avoid overflow, transform from (2 * r) to + * (2 * r) % m, and further transform to + * mathematically equivalent form shown below: + */ + r = m - ((m - r) << 1); + else + r <<= 1; + + if ((y >> i) & 0x1) + { + /* Compute (r + x) without overflow using same + * transformations described in the above comment. + */ + if (m > UINT64CONST(0x7fffffffffffffff)) + r = ((m - r) > x) ? r + x : r + x - m; + else + r = (r > m) ? r + x - m : r + x; + } + + r %= m; + } + + return r; +} + +/* Donald Knuth linear congruential generator */ +#define DK_LCG_MUL UINT64CONST(6364136223846793005) +#define DK_LCG_INC UINT64CONST(1442695040888963407) + +/* Calculate (key * DK_LCG_MUL + DK_LCG_INC) % 2^64 without overflow. + */ +static uint64 dk_lcg(uint64 key) +{ + uint64 r = modular_multiply(key, DK_LCG_MUL, UINT64CONST(0xffffffffffffffff)); + + if ((UINT64CONST(0xffffffffffffffff) - r) > DK_LCG_INC) + r += DK_LCG_INC; + else + r -= UINT64CONST(0xffffffffffffffff) - DK_LCG_INC; + + return r; +} + +/* do not use all small bits */ +#define LCG_SHIFT 13 + +/* + * PRP: parametric pseudo-random permutation + * + * Result in [0, size) is a permutation for inputs in the same set. + * + * Note that this function does not pass statistical tests: eg + * permutations of 2, 3, 4 or 5 ints are not strictly equiprobable. + * Things worsen for large sizes as there are many more permutations + * (size!) than seeds to select them (2^64 < 21!). + * However it is inexpensive compared to an actual encryption function, + * and the quality is good enough to avoid trivial correlations on + * large sizes, which is the expected use case. + * + * THIS FUNCTION IS NOT CRYPTOGRAPHICALLY SECURE. + * DO NOT USE FOR SUCH PURPOSE. + */ +static int64 +pseudorandom_perm(const int64 data, const int64 isize, const int64 seed) +{ + /* computations are performed on unsigned values */ + uint64 size = (uint64) isize; + uint64 v = (uint64) data % size; + uint64 key = (uint64) seed; + /* size-1: ensures 2 possibly overlapping halves */ + uint64 mask = compute_mask(size-1) >> 1; + + unsigned int i, p; + + /* nothing to permute */ + if (isize == 1) + return 0; + + Assert(isize >= 2); + + /* apply 4 rounds of bijective transformations: + * (1) scramble: partial xors on overlapping power-or-2 subsets + * (2) scatter: linear modulo + */ + for (i = 0, p = key % PRP_PRIMES; i < PRP_ROUNDS; i++, p = (p + 1) % PRP_PRIMES) + { + uint64 t; + + /* first "half" whitening, for v in 0 .. mask */ + key = dk_lcg(key); + if (v <= mask) + v ^= (key >> LCG_SHIFT) & mask; + + /* second (possibly overlapping) "half" whitening */ + key = dk_lcg(key); + t = size - 1 - v; + if (t <= mask) + { + t ^= (key >> LCG_SHIFT) & mask; + v = size - 1 - t; + } + + /* at most 2 primes are skipped for a given size */ + while (unlikely(size % primes[p] == 0)) + p = (p + 1) % PRP_PRIMES; + + /* scatter values with a prime multiplication */ + key = dk_lcg(key); + /* To avoid overflow in `primes[p] * v`, we check the number of + * digits of v and decide whether to use the modular_multiply function. + * Note that primes[p] is 24 bit. + */ + if ((v & UINT64CONST(0xffffffffff)) == v) + v = (primes[p] * v + (key >> LCG_SHIFT)) % size; + else + v = (modular_multiply(primes[p], v, size) + (key >> LCG_SHIFT)) % size; + } + + /* back to signed */ + return (int64) v; +} + /* * Initialize the given SimpleStats struct to all zeroes */ @@ -2420,6 +2668,27 @@ evalStandardFunc(TState *thread, CState *st, return true; } + case PGBENCH_PRPERM: + { + int64 val, size, seed; + + Assert(nargs == 3); + + if (!coerceToInt(&vargs[0], &val) || + !coerceToInt(&vargs[1], &size) || + !coerceToInt(&vargs[2], &seed)) + return false; + + if (size < 1) + { + fprintf(stderr, "pr_perm size parameter must be >= 1\n"); + return false; + } + + setIntValue(retval, pseudorandom_perm(val, size, seed)); + return true; + } + default: /* cannot get here */ Assert(0); diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h index de50340..12d5c29 100644 --- a/src/bin/pgbench/pgbench.h +++ b/src/bin/pgbench/pgbench.h @@ -99,7 +99,8 @@ typedef enum PgBenchFunction PGBENCH_IS, PGBENCH_CASE, PGBENCH_HASH_FNV1A, - PGBENCH_HASH_MURMUR2 + PGBENCH_HASH_MURMUR2, + PGBENCH_PRPERM } PgBenchFunction; typedef struct PgBenchExpr PgBenchExpr; diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl index d972903..5fdf08f 100644 --- a/src/bin/pgbench/t/001_pgbench_with_server.pl +++ b/src/bin/pgbench/t/001_pgbench_with_server.pl @@ -323,6 +323,13 @@ pgbench( qr{command=98.: int 5432\b}, # :random_seed qr{command=99.: int -9223372036854775808\b}, # min int qr{command=100.: int 9223372036854775807\b}, # max int + qr{command=101.: boolean true\b}, + qr{command=102.: boolean true\b}, + qr{command=103.: boolean true\b}, + qr{command=104.: boolean true\b}, + qr{command=105.: boolean true\b}, + qr{command=109.: boolean true\b}, + qr{command=110.: boolean true\b}, ], 'pgbench expressions', { @@ -450,6 +457,28 @@ SELECT :v0, :v1, :v2, :v3; -- minint constant parsing \set min debug(-9223372036854775808) \set max debug(-(:min + 1)) +-- pseudo-random permutation +\set t debug(pr_perm(0, 2) + pr_perm(1, 2) = 1) +\set t debug(pr_perm(0, 3) + pr_perm(1, 3) + pr_perm(2, 3) = 3) +\set t debug(pr_perm(0, 4) + pr_perm(1, 4) + pr_perm(2, 4) + pr_perm(3, 4) = 6) +\set t debug(pr_perm(0, 5) + pr_perm(1, 5) + pr_perm(2, 5) + pr_perm(3, 5) + pr_perm(4, 5) = 10) +\set t debug(pr_perm(0, 16) + pr_perm(1, 16) + pr_perm(2, 16) + pr_perm(3, 16) + \ + pr_perm(4, 16) + pr_perm(5, 16) + pr_perm(6, 16) + pr_perm(7, 16) + \ + pr_perm(8, 16) + pr_perm(9, 16) + pr_perm(10, 16) + pr_perm(11, 16) + \ + pr_perm(12, 16) + pr_perm(13, 16) + pr_perm(14, 16) + pr_perm(15, 16) = 120) +-- random sanity check +\set size random(2, 1000) +\set v random(0, :size - 1) +\set p pr_perm(:v, :size) +\set t debug(0 <= :p and :p < :size and :p = pr_perm(:v + :size, :size) and :p <> pr_perm(:v + 1, :size)) +-- actual values +\set t debug(pr_perm(:v, 1) = 0) +\set t debug(pr_perm(0, 2, 5432) = 0 and pr_perm(1, 2, 5432) = 1 and \ + pr_perm(0, 2, 5431) = 1 and pr_perm(1, 2, 5431) = 0) +-- ~50 bits test to exercise full modular multiplication +\set t debug(pr_perm(0, 1000000000000000, 54321) = 204851330149484 and \ + pr_perm(1999999999999999, 1000000000000000, 54321) = 96596601529320 and \ + pr_perm(2500000000000000, 1000000000000000, 54321) = 108116869098154) } }); @@ -740,6 +769,10 @@ SELECT LEAST(:i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i); [ 'bad boolean', 0, [qr{malformed variable.*trueXXX}], q{\set b :badtrue or true} + ], + [ + 'invalid pr_perm size', 0, + [qr{pr_perm size parameter must be >= 1}], q{\set i pr_perm(0, 0)} ],); diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl index 696c378..7c03ef2 100644 --- a/src/bin/pgbench/t/002_pgbench_no_server.pl +++ b/src/bin/pgbench/t/002_pgbench_no_server.pl @@ -306,6 +306,16 @@ my @script_tests = ( 'double overflow 3', [qr{double constant overflow}], { 'overflow-3.sql' => "\\set d .1E310\n" } + ], + [ + 'not enough arguments for pr_perm', + [qr{unexpected number of arguments \(pr_perm\)}], + { 'bad-pr_perm-1.sql' => "\\set i pr_perm(1)\n" } + ], + [ + 'too many arguments for pr_perm', + [qr{unexpected number of arguments \(pr_perm\)}], + { 'bad-pr_perm-2.sql' => "\\set i pr_perm(1, 2, 3, 4)\n" } ],); for my $t (@script_tests) diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 9798bd2..4cf3750 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -734,6 +734,9 @@ /* Define to 1 if your compiler understands __builtin_bswap64. */ #undef HAVE__BUILTIN_BSWAP64 +/* Define to 1 if your compiler understands __builtin_clzll. */ +#undef HAVE__BUILTIN_CLZLL + /* Define to 1 if your compiler understands __builtin_constant_p. */ #undef HAVE__BUILTIN_CONSTANT_P