From c72fa746108ed853f0219b10e7368b6e833e5fc5 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Sat, 2 Sep 2023 16:25:23 +0700 Subject: [PATCH v301 1/2] Use XOR for combining and do it before mixing Previously, we mixed first and then combined the input character via addition. This failed on a small set of OIDs, per report from Peter Eisentraut. --- src/tools/PerfectHash.pm | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/tools/PerfectHash.pm b/src/tools/PerfectHash.pm index e54905a3ef..0d6826141f 100644 --- a/src/tools/PerfectHash.pm +++ b/src/tools/PerfectHash.pm @@ -144,8 +144,8 @@ sub generate_hash_function $f .= sprintf "\t\tunsigned char c = *k++"; $f .= sprintf " | 0x20" if $case_fold; # see comment below $f .= sprintf ";\n\n"; - $f .= sprintf "\t\ta = a * %d + c;\n", $hash_mult1; - $f .= sprintf "\t\tb = b * %d + c;\n", $hash_mult2; + $f .= sprintf "\t\ta = (a ^ c) * %d;\n", $hash_mult1; + $f .= sprintf "\t\tb = (b ^ c) * %d;\n", $hash_mult2; $f .= sprintf "\t}\n"; $f .= sprintf "\treturn h[a %% %d] + h[b %% %d];\n", $nhash, $nhash; $f .= sprintf "}\n"; @@ -171,7 +171,7 @@ sub _calc_hash { my $cn = ord($c); $cn |= 0x20 if $case_fold; - $result = ($result * $mult + $cn) % 4294967296; + $result = (($result ^ $cn) * $mult) % 4294967296; } return $result; } @@ -203,8 +203,8 @@ sub _construct_hash_table my $nverts = 2 * $nedges + 1; # number of vertices # However, it would be very bad if $nverts were exactly equal to either - # $hash_mult1 or $hash_mult2: effectively, that hash function would be - # sensitive to only the last byte of each key. Cases where $nverts is a + # $hash_mult1 or $hash_mult2: the corresponding hash function would + # always have a modulus of zero. Cases where $nverts is a # multiple of either multiplier likewise lose information. (But $nverts # can't actually divide them, if they've been intelligently chosen as # primes.) We can avoid such problems by adjusting the table size. -- 2.41.0