From a407ee7138c597403be5e0521416e4bf5a9c2e46 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Sat, 2 Sep 2023 16:07:53 +0700 Subject: [PATCH v301 2/2] Use powers of two when choosing multipliers for perfect hash functions --- src/tools/PerfectHash.pm | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/tools/PerfectHash.pm b/src/tools/PerfectHash.pm index 0d6826141f..dc5a9b4427 100644 --- a/src/tools/PerfectHash.pm +++ b/src/tools/PerfectHash.pm @@ -77,8 +77,10 @@ sub generate_hash_function $case_fold = $options{case_fold} || 0; # Try different hash function parameters until we find a set that works - # for these keys. The multipliers are chosen to be primes that are cheap - # to calculate via shift-and-add, so don't change them without care. + # for these keys. The multipliers are chosen to be numbers that are cheap + # to calculate, so don't change them without care. + # Currently they are either powers-of-two (which reduce to a shift), + # or adjacent primes (which reduce to shift-and-add). # (Commonly, random seeds are tried, but we want reproducible results # from this program so we don't do that.) my $hash_mult1 = 257; @@ -89,12 +91,12 @@ sub generate_hash_function FIND_PARAMS: for ($hash_seed1 = 0; $hash_seed1 < 10; $hash_seed1++) { - for ($hash_seed2 = 0; $hash_seed2 < 10; $hash_seed2++) { - foreach (17, 31, 127, 8191) + foreach (16, 32, 64, 128, 512, 1024, 2048, 4096, 8192, 17, 31, 127, 8191) { $hash_mult2 = $_; # "foreach $hash_mult2" doesn't work + @subresult = _construct_hash_table( $keys_ref, $hash_mult1, $hash_mult2, $hash_seed1, $hash_seed2); @@ -205,11 +207,12 @@ sub _construct_hash_table # However, it would be very bad if $nverts were exactly equal to either # $hash_mult1 or $hash_mult2: the corresponding hash function would # always have a modulus of zero. Cases where $nverts is a - # multiple of either multiplier likewise lose information. (But $nverts - # can't actually divide them, if they've been intelligently chosen as - # primes.) We can avoid such problems by adjusting the table size. + # multiple or divisor of either multiplier likewise lose information. + # We can avoid such problems by adjusting the table size. while ($nverts % $hash_mult1 == 0 - || $nverts % $hash_mult2 == 0) + || $nverts % $hash_mult2 == 0 + || $hash_mult1 % $nverts == 0 + || $hash_mult2 % $nverts == 0) { $nverts++; } -- 2.41.0