diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 88cf8b3..31ef39b 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -917,7 +917,7 @@ pgbench options d
default_seed
- seed used in hash functions by default
+ seed used in hash and pseudo-random permutation functions by default
@@ -1371,6 +1371,13 @@ pgbench options d
1024.0
+ pr_perm(i, size [, seed ] )
+ integer
+ pseudo-random permutation in [0,size)
+ pr_perm(0, 4)
+ 0, 1, 2 or 3
+
+
random(lb, ub)
integer
uniformly-distributed random integer in [lb, ub]
@@ -1532,6 +1539,21 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) /
+ Function pr_perm implements a pseudo-random permutation.
+ It permutes integers in [0, size) using a seed by applying rounds of
+ simple invertible functions, similarly to an encryption function,
+ although beware that it is not at all cryptographically secure.
+ It allows to mix the output of non uniform random functions so that
+ values drawn more often are not correlated.
+ Values outside the interval are interpreted modulo the size.
+ The function errors if size is not positive.
+ If no seed is provided, :default_seed is used.
+ Compared to hash functions discussed above, the function
+ ensures that a perfect permutation is applied: there are no collisions
+ nor holes in the output values.
+
+
+
As an example, the full definition of the built-in TPC-B-like
transaction is:
diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y
index f7c56cc..762a629 100644
--- a/src/bin/pgbench/exprparse.y
+++ b/src/bin/pgbench/exprparse.y
@@ -19,6 +19,7 @@
#define PGBENCH_NARGS_VARIABLE (-1)
#define PGBENCH_NARGS_CASE (-2)
#define PGBENCH_NARGS_HASH (-3)
+#define PGBENCH_NARGS_PRPERM (-4)
PgBenchExpr *expr_parse_result;
@@ -366,6 +367,9 @@ static const struct
{
"hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A
},
+ {
+ "pr_perm", PGBENCH_NARGS_PRPERM, PGBENCH_PRPERM
+ },
/* keep as last array element */
{
NULL, 0, 0
@@ -478,6 +482,19 @@ make_func(yyscan_t yyscanner, int fnumber, PgBenchExprList *args)
}
break;
+ /* pseudo-random permutation function with optional seed argument */
+ case PGBENCH_NARGS_PRPERM:
+ if (len < 2 || len > 3)
+ expr_yyerror_more(yyscanner, "unexpected number of arguments",
+ PGBENCH_FUNCTIONS[fnumber].fname);
+
+ if (len == 2)
+ {
+ PgBenchExpr *var = make_variable("default_seed");
+ args = make_elist(var, args);
+ }
+ break;
+
/* common case: positive arguments number */
default:
Assert(PGBENCH_FUNCTIONS[fnumber].nargs >= 0);
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 41b756c..763bf6f 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -986,6 +986,215 @@ getHashMurmur2(int64 val, uint64 seed)
return (int64) result;
}
+/* pseudo-random permutation */
+
+/* 16 so that % 16 can be optimized to & 0x0f */
+#define PRP_PRIMES 16
+/* 27-29 bits mega primes from https://primes.utm.edu/lists/small/millions/ */
+static int64 primes[PRP_PRIMES] = {
+ INT64CONST(122949829),
+ INT64CONST(141650963),
+ INT64CONST(160481219),
+ INT64CONST(179424691),
+ INT64CONST(198491329),
+ INT64CONST(217645199),
+ INT64CONST(236887699),
+ INT64CONST(256203221),
+ INT64CONST(275604547),
+ INT64CONST(295075153),
+ INT64CONST(314606891),
+ INT64CONST(334214467),
+ INT64CONST(353868019),
+ INT64CONST(373587911),
+ INT64CONST(393342743),
+ INT64CONST(413158523)
+};
+
+/* how many "encryption" rounds to apply */
+#define PRP_ROUNDS 4
+
+/* return largest mask in 0 .. n-1 */
+static uint64 compute_prp_mask(uint64 n)
+{
+ n |= n >> 1;
+ n |= n >> 2;
+ n |= n >> 4;
+ n |= n >> 8;
+ n |= n >> 16;
+ n |= n >> 32;
+ return n >> 1;
+}
+
+/*
+ * Calculate (x * y) % m, where x and y in [0, 2^64), m in [1, 2^64).
+ *
+ * If x or y is greater than 2^32, improved interleaved modular
+ * multiplication algorithm is used to avoid overflow.
+ */
+static uint64 modular_multiplicate(uint64 x, uint64 y, const uint64 m)
+{
+ int i, bits;
+ uint64 r = 0;
+
+ Assert(1 <= m);
+
+ /* Because of (x * y) % m = (x % m * y % m) % m */
+ if (x >= m)
+ x %= m;
+ if (y >= m)
+ y %= m;
+
+ /* Return the trivial result. */
+ if (x == 0 || y == 0 || m == 1)
+ return 0;
+
+ /* Return the result if (x * y) can be multiplicated without overflow. */
+ if ((x | y) < (0xffffffff))
+ return (x * y) % m;
+
+ /* To reduce the for loop in the algorithm below. */
+ if (x < y)
+ {
+ uint64 tmp = x;
+ x = y;
+ y = tmp;
+ }
+
+ /* Interleaved modular multiplication algorithm [1]
+ *
+ * This algorithm is usually used in the field of digital circuit
+ * design.
+ *
+ * Input: X, Y, M; 0 <= X, Y <= M;
+ * Output: R = X * Y mod M;
+ * bits: number of bits of Y
+ * Y[i]: i th bit of Y
+ *
+ * 1. R = 0;
+ * 2. for (i = bits - 1; i >= 0; i--) {
+ * 3. R = 2 * R;
+ * 4. if (Y[i] == 0x1)
+ * 5. R += X;
+ * 6. if (R >= M) R -= M;
+ * 7. if (R >= M) R -= M;
+ * }
+ *
+ * In Steps 3 and 5, overflow should be avoided.
+ * Steps 6 and 7 can be instead of a modular operation (R %= M).
+ *
+ * Reference
+ * [1] D.N. Amanor, et al, "Efficient hardware architecture for
+ * modular multiplication on FPGAs", in Field Programmable
+ * Logic and Apllications, 2005. International Conference on,
+ * Aug 2005, pp. 539-542.
+ */
+
+ bits = 64;
+ while (bits > 0 && (y >> (64 - bits) | 0x1) == 0)
+ bits--;
+
+ for (i = bits - 1; i >= 0; i--)
+ {
+ if (r > 0x7fffffffffffffff)
+ /* To avoid overflow, transform from (2 * r) to
+ * (2 * r) % m, and further transform to
+ * mathematically equivalent form shown below:
+ */
+ r = m - ((m - r) << 1);
+ else
+ r <<= 1;
+
+ if ((y >> i) & 0x1)
+ {
+ /* Calculate (r + x) without overflow using same
+ * transformations described in the above comment.
+ */
+ if (m > 0x7fffffffffffffff)
+ r = ((m - r) > x) ? r + x : r + x - m;
+ else
+ r = (r > m) ? r - m + x : r + x;
+ }
+
+ r %= m;
+ }
+
+ return r;
+}
+
+/* Donald Knuth linear congruential generator */
+#define DK_LCG_MUL INT64CONST(6364136223846793005)
+#define DK_LCG_INC INT64CONST(1442695040888963407)
+
+/* do not use all small bits */
+#define LCG_SHIFT 13
+
+/*
+ * PRP: parametric pseudo-random permutation
+ *
+ * Result in [0, size) is a permutation for inputs in the same set.
+ *
+ * Note that this function does not pass statistical tests: eg
+ * permutations of 2, 3, 4 or 5 ints are not strictly equiprobable.
+ * However it is inexpensive compared to an actual encryption function,
+ * and the quality is good enough to avoid trivial correlations on
+ * large sizes, which is the expected use case.
+ *
+ * THIS FUNCTION IS NOT CRYPTOGRAPHICALLY SECURE.
+ * PLEASE DO NOT USE FOR SUCH PURPOSE.
+ */
+static int64
+pseudorandom_perm(const int64 data, const int64 isize, const int64 seed)
+{
+ /* computations are performed on unsigned values */
+ uint64 key = (uint64) seed;
+ uint64 size = (uint64) isize;
+ uint64 v = (uint64) data % size;
+ /* size-1: ensures 2 possibly overlapping halves */
+ uint64 mask = compute_prp_mask(size-1);
+
+ unsigned int i, p;
+
+ /* nothing to permute */
+ if (isize == 1)
+ return 0;
+
+ Assert(isize >= 2);
+
+ /* apply 4 rounds of bijective transformations:
+ * (1) scramble: partial xors on power-or-2 subsets
+ * (2) scatter: linear modulo
+ */
+ for (i = 0, p = key % PRP_PRIMES; i < PRP_ROUNDS; i++, p = (p + 1) % PRP_PRIMES)
+ {
+ uint64 t;
+
+ /* first "half" whitening, for v in 0 .. mask */
+ key = key * DK_LCG_MUL + DK_LCG_INC;
+ if (v <= mask)
+ v ^= (key >> LCG_SHIFT) & mask;
+
+ /* second (possibly overlapping) "half" whitening */
+ key = key * DK_LCG_MUL + DK_LCG_INC;
+ t = size - 1 - v;
+ if (t <= mask)
+ {
+ t ^= (key >> LCG_SHIFT) & mask;
+ v = size - 1 - t;
+ }
+
+ /* at most 2 primes are skipped for a given size */
+ while (unlikely(size % primes[p] == 0))
+ p = (p + 1) % PRP_PRIMES;
+
+ /* scatter values with a prime multiplication */
+ key = key * DK_LCG_MUL + DK_LCG_INC;
+ v = (modular_multiplicate((uint64)primes[p], v, size) + (key >> LCG_SHIFT)) % size;
+ }
+
+ /* back to signed */
+ return (int64) v;
+}
+
/*
* Initialize the given SimpleStats struct to all zeroes
*/
@@ -2319,6 +2528,26 @@ evalStandardFunc(TState *thread, CState *st,
return true;
}
+ case PGBENCH_PRPERM:
+ {
+ int64 val, size, seed;
+ Assert(nargs == 3);
+
+ if (!coerceToInt(&vargs[0], &val) ||
+ !coerceToInt(&vargs[1], &size) ||
+ !coerceToInt(&vargs[2], &seed))
+ return false;
+
+ if (size < 1)
+ {
+ fprintf(stderr, "pr_perm size parameter must be >= 1\n");
+ return false;
+ }
+
+ setIntValue(retval, pseudorandom_perm(val, size, seed));
+ return true;
+ }
+
default:
/* cannot get here */
Assert(0);
diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h
index 6983865..665c450 100644
--- a/src/bin/pgbench/pgbench.h
+++ b/src/bin/pgbench/pgbench.h
@@ -99,7 +99,8 @@ typedef enum PgBenchFunction
PGBENCH_IS,
PGBENCH_CASE,
PGBENCH_HASH_FNV1A,
- PGBENCH_HASH_MURMUR2
+ PGBENCH_HASH_MURMUR2,
+ PGBENCH_PRPERM
} PgBenchFunction;
typedef struct PgBenchExpr PgBenchExpr;
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index 2fc021d..0aec384 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -322,6 +322,14 @@ pgbench(
qr{command=96.: int 1\b}, # :scale
qr{command=97.: int 0\b}, # :client_id
qr{command=98.: int 5432\b}, # :random_seed
+ qr{command=99.: boolean true\b},
+ qr{command=100.: boolean true\b},
+ qr{command=101.: boolean true\b},
+ qr{command=102.: boolean true\b},
+ qr{command=103.: boolean true\b},
+ qr{command=107.: boolean true\b},
+ qr{command=108.: boolean true\b},
+ qr{command=109.: boolean true\b},
],
'pgbench expressions',
{
@@ -447,6 +455,24 @@ SELECT :v0, :v1, :v2, :v3;
\set sc debug(:scale)
\set ci debug(:client_id)
\set rs debug(:random_seed)
+-- pseudo-random permutation
+\set t debug(pr_perm(0, 2) + pr_perm(1, 2) = 1)
+\set t debug(pr_perm(0, 3) + pr_perm(1, 3) + pr_perm(2, 3) = 3)
+\set t debug(pr_perm(0, 4) + pr_perm(1, 4) + pr_perm(2, 4) + pr_perm(3, 4) = 6)
+\set t debug(pr_perm(0, 5) + pr_perm(1, 5) + pr_perm(2, 5) + pr_perm(3, 5) + pr_perm(4, 5) = 10)
+\set t debug(pr_perm(0, 16) + pr_perm(1, 16) + pr_perm(2, 16) + pr_perm(3, 16) + \
+ pr_perm(4, 16) + pr_perm(5, 16) + pr_perm(6, 16) + pr_perm(7, 16) + \
+ pr_perm(8, 16) + pr_perm(9, 16) + pr_perm(10, 16) + pr_perm(11, 16) + \
+ pr_perm(12, 16) + pr_perm(13, 16) + pr_perm(14, 16) + pr_perm(15, 16) = 120)
+-- random sanity check
+\set size random(2, 1000)
+\set v random(0, :size - 1)
+\set p pr_perm(:v, :size)
+\set t debug(0 <= :p and :p < :size and :p = pr_perm(:v + :size, :size) and :p <> pr_perm(:v + 1, :size))
+-- actual values
+\set t debug(pr_perm(:v, 1) = 0)
+\set t debug(pr_perm(0, 2, 5432) = 0 and pr_perm(1, 2, 5432) = 1 and \
+ pr_perm(0, 2, 5431) = 1 and pr_perm(1, 2, 5431) = 0)
}
});
@@ -731,6 +757,10 @@ SELECT LEAST(:i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i);
[
'bad boolean', 0,
[qr{malformed variable.*trueXXX}], q{\set b :badtrue or true}
+ ],
+ [
+ 'invalid pr_perm size', 0,
+ [qr{pr_perm size parameter must be >= 1}], q{\set i pr_perm(0, 0)}
],);
diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl
index c1c2c1e..ff02cfb 100644
--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -290,6 +290,16 @@ my @script_tests = (
'too many arguments for hash',
[qr{unexpected number of arguments \(hash\)}],
{ 'bad-hash-2.sql' => "\\set i hash(1,2,3)\n" }
+ ],
+ [
+ 'not enough arguments for pr_perm',
+ [qr{unexpected number of arguments \(pr_perm\)}],
+ { 'bad-pr_perm-1.sql' => "\\set i pr_perm(1)\n" }
+ ],
+ [
+ 'too many arguments for pr_perm',
+ [qr{unexpected number of arguments \(pr_perm\)}],
+ { 'bad-pr_perm-2.sql' => "\\set i pr_perm(1, 2, 3, 4)\n" }
],);
for my $t (@script_tests)