diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml index 3dd492c..c31254c 100644 --- a/doc/src/sgml/ref/pgbench.sgml +++ b/doc/src/sgml/ref/pgbench.sgml @@ -882,6 +882,11 @@ pgbench options d client_id unique number identifying the client session (starts from zero) + + + hash_seed + random seed used in hash functions by default + @@ -1246,6 +1251,27 @@ pgbench options d 5 + hash(a [, seed ] ) + integer + alias for hash_murmur2() + hash(10, 5432) + -5817877081768721676 + + + hash_fnv1a(a [, seed ] ) + integer + FNV hash + hash_fnv1a(10, 5432) + -7793829335365542153 + + + hash_murmur2(a [, seed ] ) + integer + murmur2 hash + hash_murmur2(10, 5432) + -5817877081768721676 + + int(x) integer cast to int @@ -1423,6 +1449,22 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) / + Hash functions accepts an input value and an optional seed parameter. In case the seed isn't provided hash_seed value is used. Hash functions can be used, for example, to modify the distribution of random_zipfian or random_exponential functions in order to obtain scattered distribution. Thus the following pgbench script simulates possible real world workload typical for social media and blogging platforms where few accounts generate excessive load: + + +\set r random_zipfian(0, 100000000, 1.07) +\set k abs(hash(:r)) % 1000000 + + + In some cases several distinct distributions are needed which don't correlate with each other and this is when implicit seed parameter comes in handy: + + +\set k1 abs(hash(:r), :hash_seed + 123) % 1000000 +\set k2 abs(hash(:r), :hash_seed + 321) % 1000000 + + + + As an example, the full definition of the built-in TPC-B-like transaction is: diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y index e23ca51..b5e150f 100644 --- a/src/bin/pgbench/exprparse.y +++ b/src/bin/pgbench/exprparse.y @@ -16,6 +16,10 @@ #include "pgbench.h" +#define PGBENCH_NARGS_VARIABLE (-1) +#define PGBENCH_NARGS_CASE (-2) +#define PGBENCH_NARGS_HASH (-3) + PgBenchExpr *expr_parse_result; static PgBenchExprList *make_elist(PgBenchExpr *exp, PgBenchExprList *list); @@ -226,9 +230,13 @@ make_uop(yyscan_t yyscanner, const char *operator, PgBenchExpr *expr) /* * List of available functions: * - fname: function name, "!..." for special internal functions - * - nargs: number of arguments - * -1 is a special value for least & greatest meaning #args >= 1 - * -2 is for the "CASE WHEN ..." function, which has #args >= 3 and odd + * - nargs: number of arguments. Special cases: + * - PGBENCH_NARGS_VARIABLE is a special value for least & greatest + * meaning #args >= 1; + * - PGBENCH_NARGS_CASE is for the "CASE WHEN ..." function, which + * has #args >= 3 and odd; + * - PGBENCH_NARGS_HASH is for hash functions, which have one required + * and one optional argument; * - tag: function identifier from PgBenchFunction enum */ static const struct @@ -259,10 +267,10 @@ static const struct "abs", 1, PGBENCH_ABS }, { - "least", -1, PGBENCH_LEAST + "least", PGBENCH_NARGS_VARIABLE, PGBENCH_LEAST }, { - "greatest", -1, PGBENCH_GREATEST + "greatest", PGBENCH_NARGS_VARIABLE, PGBENCH_GREATEST }, { "debug", 1, PGBENCH_DEBUG @@ -347,7 +355,25 @@ static const struct }, /* "case when ... then ... else ... end" construction */ { - "!case_end", -2, PGBENCH_CASE + "!case_end", PGBENCH_NARGS_CASE, PGBENCH_CASE + }, + { + "hash", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2 + }, + { + "hash_murmur2", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2 + }, + { + "hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A + }, + { + "hash", -3, PGBENCH_HASH_MURMUR2 + }, + { + "hash_murmur2", -3, PGBENCH_HASH_MURMUR2 + }, + { + "hash_fnv1a", -3, PGBENCH_HASH_FNV1A }, /* keep as last array element */ { @@ -423,29 +449,49 @@ elist_length(PgBenchExprList *list) static PgBenchExpr * make_func(yyscan_t yyscanner, int fnumber, PgBenchExprList *args) { + int len = elist_length(args); + PgBenchExpr *expr = pg_malloc(sizeof(PgBenchExpr)); Assert(fnumber >= 0); - if (PGBENCH_FUNCTIONS[fnumber].nargs >= 0 && - PGBENCH_FUNCTIONS[fnumber].nargs != elist_length(args)) - expr_yyerror_more(yyscanner, "unexpected number of arguments", - PGBENCH_FUNCTIONS[fnumber].fname); - - /* check at least one arg for least & greatest */ - if (PGBENCH_FUNCTIONS[fnumber].nargs == -1 && - elist_length(args) == 0) - expr_yyerror_more(yyscanner, "at least one argument expected", - PGBENCH_FUNCTIONS[fnumber].fname); - /* special case: case (when ... then ...)+ (else ...)? end */ - if (PGBENCH_FUNCTIONS[fnumber].nargs == -2) - { - int len = elist_length(args); - - /* 'else' branch is always present, but could be a NULL-constant */ - if (len < 3 || len % 2 != 1) - expr_yyerror_more(yyscanner, "odd and >= 3 number of arguments expected", - "case control structure"); + /* validate arguments number including few special cases */ + switch (PGBENCH_FUNCTIONS[fnumber].nargs) + { + /* check at least one arg for least & greatest */ + case PGBENCH_NARGS_VARIABLE: + if (len == 0) + expr_yyerror_more(yyscanner, "at least one argument expected", + PGBENCH_FUNCTIONS[fnumber].fname); + break; + + /* case (when ... then ...)+ (else ...)? end */ + case PGBENCH_NARGS_CASE: + /* 'else' branch is always present, but could be a NULL-constant */ + if (len < 3 || len % 2 != 1) + expr_yyerror_more(yyscanner, + "odd and >= 3 number of arguments expected", + "case control structure"); + break; + + /* hash functions with optional seed argument */ + case PGBENCH_NARGS_HASH: + if (len > 2) + expr_yyerror_more(yyscanner, "unexpected number of arguments", + PGBENCH_FUNCTIONS[fnumber].fname); + + if (len == 1) + { + PgBenchExpr *var = make_variable("hash_seed"); + args = make_elist(var, args); + } + break; + + /* common case: positive arguments number */ + default: + if (PGBENCH_FUNCTIONS[fnumber].nargs != len) + expr_yyerror_more(yyscanner, "unexpected number of arguments", + PGBENCH_FUNCTIONS[fnumber].fname); } expr->etype = ENODE_FUNCTION; diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index 31ea6ca..8e6e0ad 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -61,6 +61,14 @@ #define ERRCODE_UNDEFINED_TABLE "42P01" /* + * Hashing constants + */ +#define FNV_PRIME 0x100000001b3 +#define FNV_OFFSET_BASIS 0xcbf29ce484222325 +#define MM2_MUL 0xc6a4a7935bd1e995 +#define MM2_ROT 47 + +/* * Multi-platform pthread implementations */ @@ -915,6 +923,51 @@ getZipfianRand(TState *thread, int64 min, int64 max, double s) } /* + * FNV-1a hash function + */ +static int64 +getHashFnv1a(int64 val, uint64 seed) +{ + int64 result; + int i; + + result = FNV_OFFSET_BASIS ^ seed; + for (i = 0; i < 8; ++i) + { + int32 octet = val & 0xff; + + val = val >> 8; + result = result ^ octet; + result = result * FNV_PRIME; + } + + return result; +} + +/* + * Murmur2 hash function + */ +static int64 +getHashMurmur2(int64 val, uint64 seed) +{ + uint64 result = seed ^ (sizeof(int64) * MM2_MUL); + uint64 k = (uint64) val; + + k *= MM2_MUL; + k ^= k >> MM2_ROT; + k *= MM2_MUL; + + result ^= k; + result *= MM2_MUL; + + result ^= result >> MM2_ROT; + result *= MM2_MUL; + result ^= result >> MM2_ROT; + + return (int64) result; +} + +/* * Initialize the given SimpleStats struct to all zeroes */ static void @@ -2209,6 +2262,28 @@ evalStandardFunc( return true; } + /* hashing */ + case PGBENCH_HASH_FNV1A: + case PGBENCH_HASH_MURMUR2: + { + int64 val; + int64 seed; + int64 result; + + Assert(nargs == 2); + + if (!coerceToInt(&vargs[0], &val)) + return false; + + if (!coerceToInt(&vargs[1], &seed)) + return false; + + result = (func == PGBENCH_HASH_FNV1A) ? + getHashFnv1a(val, seed) : getHashMurmur2(val, seed); + setIntValue(retval, result); + return true; + } + default: /* cannot get here */ Assert(0); @@ -4972,6 +5047,10 @@ main(int argc, char **argv) exit(1); } + /* set random seed */ + INSTR_TIME_SET_CURRENT(start_time); + srandom((unsigned int) INSTR_TIME_GET_MICROSEC(start_time)); + if (internal_script_used) { /* @@ -5033,6 +5112,21 @@ main(int argc, char **argv) } } + /* set default seed for hash functions */ + if (lookupVariable(&state[0], "hash_seed") == NULL) + { + uint64 seed; + + seed = (uint64) (random() & 0xFFFF) << 48; + seed |= (uint64) (random() & 0xFFFF) << 32; + seed |= (uint64) (random() & 0xFFFF) << 16; + seed |= (uint64) (random() & 0xFFFF); + + for (i = 0; i < nclients; i++) + if (!putVariableInt(&state[i], "startup", "hash_seed", (int64) seed)) + exit(1); + } + if (!is_no_vacuum) { fprintf(stderr, "starting vacuum..."); @@ -5050,10 +5144,6 @@ main(int argc, char **argv) } PQfinish(con); - /* set random seed */ - INSTR_TIME_SET_CURRENT(start_time); - srandom((unsigned int) INSTR_TIME_GET_MICROSEC(start_time)); - /* set up thread data structures */ threads = (TState *) pg_malloc(sizeof(TState) * nthreads); nclients_dealt = 0; diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h index 0705ccd..6983865 100644 --- a/src/bin/pgbench/pgbench.h +++ b/src/bin/pgbench/pgbench.h @@ -97,7 +97,9 @@ typedef enum PgBenchFunction PGBENCH_LE, PGBENCH_LT, PGBENCH_IS, - PGBENCH_CASE + PGBENCH_CASE, + PGBENCH_HASH_FNV1A, + PGBENCH_HASH_MURMUR2 } PgBenchFunction; typedef struct PgBenchExpr PgBenchExpr; diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl index a8b2962..752c299 100644 --- a/src/bin/pgbench/t/001_pgbench_with_server.pl +++ b/src/bin/pgbench/t/001_pgbench_with_server.pl @@ -259,6 +259,11 @@ pgbench( qr{command=46.: int 46\b}, qr{command=47.: boolean true\b}, qr{command=48.: boolean true\b}, + qr{command=49.: int -5817877081768721676\b}, + qr{command=50.: boolean true\b}, + qr{command=51.: int -7793829335365542153\b}, + qr{command=52.: int -?\d+\b}, + qr{command=53.: boolean true\b}, ], 'pgbench expressions', { '001_pgbench_expressions' => q{-- integer functions @@ -327,6 +332,12 @@ pgbench( \set n6 debug(:n IS NULL AND NOT :f AND :t) -- conditional truth \set cs debug(CASE WHEN 1 THEN TRUE END AND CASE WHEN 1.0 THEN TRUE END AND CASE WHEN :n THEN NULL ELSE TRUE END) +-- hash functions +\set h0 debug(hash(10, 5432)) +\set h1 debug(:h0 = hash_murmur2(10, 5432)) +\set h3 debug(hash_fnv1a(10, 5432)) +\set h4 debug(hash(10)) +\set h5 debug(hash(10) = hash(10, :hash_seed)) -- lazy evaluation \set zy 0 \set yz debug(case when :zy = 0 then -1 else (1 / :zy) end)