blob: 62df4c3ec7e75fcd3374b953395984f64175825c [file] [log] [blame]
Paul Miller7c0efea2018-11-13 23:49:001// Copyright 2018 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef COMPONENTS_VARIATIONS_VARIATIONS_MURMUR_HASH_H_
6#define COMPONENTS_VARIATIONS_VARIATIONS_MURMUR_HASH_H_
7
8#include <cstdint>
Paul Miller7c0efea2018-11-13 23:49:009#include <vector>
10
11#include "base/compiler_specific.h"
Scott Violet69a5d8dd2021-06-01 23:46:4812#include "base/component_export.h"
Jan Wilken Dörriee7843132021-02-16 20:32:5913#include "base/strings/string_piece.h"
Paul Miller7c0efea2018-11-13 23:49:0014
15namespace variations {
16namespace internal {
17
18// Hash utilities for NormalizedMurmurHashEntropyProvider. For more info, see:
19// https://docs.google.com/document/d/1cPF5PruriWNP2Z5gSkq4MBTm0wSZqLyIJkUO9ekibeo
Scott Violet69a5d8dd2021-06-01 23:46:4820class COMPONENT_EXPORT(VARIATIONS) VariationsMurmurHash {
Paul Miller7c0efea2018-11-13 23:49:0021 public:
22 // Prepares data to be hashed by VariationsMurmurHash: align and zero-pad to a
23 // multiple of 4 bytes, and produce the same uint32_t values regardless of
24 // platform endianness. ("abcd" will always become 0x64636261). Any padding
25 // will appear in the more-significant bytes of the last uint32_t.
Jan Wilken Dörriee7843132021-02-16 20:32:5926 static std::vector<uint32_t> StringToLE32(base::StringPiece data);
Paul Miller7c0efea2018-11-13 23:49:0027
28 // Hash is a reimplementation of MurmurHash3_x86_32 from third_party/smhasher/
29 // which works on all architectures. MurmurHash3_x86_32 does unaligned reads
30 // (not generally safe on ARM) if the input bytes start on an unaligned
31 // address, and it assumes little-endianness. Hash produces the same result
32 // for the same input uint32_t values, regardless of platform endianness, and
33 // it produces the same results that MurmurHash3_x86_32 would produce on a
34 // little-endian platform.
35 //
Paul Miller43aa56212019-02-21 03:26:5836 // |length| is the number of bytes to hash. It mustn't exceed data.size() * 4.
37 // If length % 4 != 0, Hash will consume the less-significant bytes of the
38 // last uint32_t first.
Paul Miller7c0efea2018-11-13 23:49:0039 //
40 // MurmurHash3_x86_32 takes a seed, for which 0 is the typical value. Hash
41 // hard-codes the seed to 0, since NormalizedMurmurHashEntropyProvider doesn't
42 // use it.
43 static uint32_t Hash(const std::vector<uint32_t>& data, size_t length);
44
45 // A version of Hash which is specialized for exactly 2 bytes of data and
46 // allows a nonzero seed. NormalizedMurmurHashEntropyProvider calls this in a
47 // loop, |kMaxLowEntropySize| times per study, so it must be fast.
48 ALWAYS_INLINE static uint32_t Hash16(uint32_t seed, uint16_t data) {
49 uint32_t h1 = seed, k1 = data;
50
51 // tail
52 k1 *= c1;
53 k1 = RotateLeft(k1, 15);
54 k1 *= c2;
55 h1 ^= k1;
56
57 // finalization
58 h1 ^= 2;
59 h1 = FinalMix(h1);
60
61 return h1;
62 }
63
64 private:
65 static const uint32_t c1 = 0xcc9e2d51;
66 static const uint32_t c2 = 0x1b873593;
67
68 ALWAYS_INLINE static uint32_t RotateLeft(uint32_t x, int n) {
69 return (x << n) | (x >> (32 - n));
70 }
71
72 ALWAYS_INLINE static uint32_t FinalMix(uint32_t h) {
73 h ^= h >> 16;
74 h *= 0x85ebca6b;
75 h ^= h >> 13;
76 h *= 0xc2b2ae35;
77 h ^= h >> 16;
78 return h;
79 }
80};
81
82} // namespace internal
83} // namespace variations
84
85#endif // COMPONENTS_VARIATIONS_VARIATIONS_MURMUR_HASH_H_