From c4f8bd137ab8fe498237e499e9384f3ea7dd45b1 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Fri, 24 Apr 2020 22:28:46 +0200 Subject: [PATCH 4/4] try using murmuhash --- src/backend/executor/execExprInterp.c | 71 ++++++++++++++++++++++----- 1 file changed, 59 insertions(+), 12 deletions(-) diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 48a63391f5..45e0954fde 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -3776,28 +3776,74 @@ ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext * *op->resnull = resultnull; } + +static inline uint32_t murmur_32_scramble(uint32_t k) { + k *= 0xcc9e2d51; + k = (k << 15) | (k >> 17); + k *= 0x1b873593; + return k; +} + +static +uint32_t murmur3_32(const uint8_t* key, size_t len, uint32_t seed) +{ + uint32_t h = seed; + uint32_t k; + /* Read in groups of 4. */ + for (size_t i = len >> 2; i; i--) { + // Here is a source of differing results across endiannesses. + // A swap here has no effects on hash properties though. + memcpy(&k, key, sizeof(uint32_t)); + key += sizeof(uint32_t); + h ^= murmur_32_scramble(k); + h = (h << 13) | (h >> 19); + h = h * 5 + 0xe6546b64; + } + /* Read the rest. */ + k = 0; + for (size_t i = len & 3; i; i--) { + k <<= 8; + k |= key[i - 1]; + } + // A swap is *not* necessary here because the preceding loop already + // places the low bytes in the low places according to whatever endianness + // we use. Swaps only apply when the memory is copied in a chunk. + h ^= murmur_32_scramble(k); + /* Finalize. */ + h ^= len; + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + return h; +} + + static void bloom_filter_add(PGFunction hash_fn_addr, FunctionCallInfo fcinfo, char *filter, int m, int k, Datum *seeds, Datum value) { int i; + uint64 h; /* Call hash function */ fcinfo->args[0].value = value; fcinfo->args[0].isnull = false; + fcinfo->args[1].isnull = false; + fcinfo->args[1].value = (Datum) 0; + + h = DatumGetUInt64(hash_fn_addr(fcinfo)); + for (i = 0; i < k; i++) { - uint64 h; int byteIdx; int bitIdx; - fcinfo->args[1].isnull = false; - fcinfo->args[1].value = seeds[i]; - - h = DatumGetUInt64(hash_fn_addr(fcinfo)); + uint32 v = (uint32) murmur3_32((const unsigned char *) &h, sizeof(uint64), (int) seeds[i]); - bitIdx = h % m; + bitIdx = v % m; byteIdx = bitIdx / 8; bitIdx = bitIdx % 8; @@ -3811,24 +3857,25 @@ bloom_filter_check(PGFunction hash_fn_addr, FunctionCallInfo fcinfo, char *filter, int m, int k, Datum *seeds, Datum value) { int i; + uint64 h; /* Call hash function */ fcinfo->args[0].value = value; - fcinfo->args[0].isnull = false; + fcinfo->args[1].isnull = false; + fcinfo->args[1].value = (Datum) 0; + + h = DatumGetUInt64(hash_fn_addr(fcinfo)); for (i = 0; i < k; i++) { - uint64 h; int byteIdx; int bitIdx; - fcinfo->args[1].value = seeds[i]; - - h = DatumGetUInt64(hash_fn_addr(fcinfo)); + uint32 v = (uint32) murmur3_32((const unsigned char *) &h, sizeof(uint64), (int) seeds[i]); - bitIdx = h % m; + bitIdx = v % m; byteIdx = bitIdx / 8; bitIdx = bitIdx % 8; -- 2.21.1