Line data Source code
1 : /**
2 : Copyright (c) 2021-2022 Roman Katuntsev <sbkarr@stappler.org>
3 : Copyright (c) 2023 Stappler LLC <admin@stappler.dev>
4 :
5 : Permission is hereby granted, free of charge, to any person obtaining a copy
6 : of this software and associated documentation files (the "Software"), to deal
7 : in the Software without restriction, including without limitation the rights
8 : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 : copies of the Software, and to permit persons to whom the Software is
10 : furnished to do so, subject to the following conditions:
11 :
12 : The above copyright notice and this permission notice shall be included in
13 : all copies or substantial portions of the Software.
14 :
15 : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 : THE SOFTWARE.
22 : **/
23 :
24 : #ifndef STAPPLER_CORE_STRING_SPHASH_H_
25 : #define STAPPLER_CORE_STRING_SPHASH_H_
26 :
27 : #include "SPCore.h"
28 :
29 : // Based on XXH (https://cyan4973.github.io/xxHash/#benchmarks)
30 : // constexpr implementation from https://github.com/ekpyron/xxhashct
31 :
32 : // Requires C++17
33 :
34 : namespace STAPPLER_VERSIONIZED stappler::hash {
35 :
36 : #define SP_HASH_INLINE [[gnu::always_inline]]
37 :
38 : class xxh32 {
39 : public:
40 568666 : static constexpr uint32_t hash (const char *input, uint32_t len, uint32_t seed) {
41 1705998 : return finalize((len >= 16
42 568666 : ? h16bytes(input, len, seed)
43 : : seed + PRIME5)
44 1137332 : + len, (input) + (len & ~0xF), len & 0xF);
45 : }
46 :
47 : private:
48 : static constexpr uint32_t PRIME1 = 0x9E3779B1U;
49 : static constexpr uint32_t PRIME2 = 0x85EBCA77U;
50 : static constexpr uint32_t PRIME3 = 0xC2B2AE3DU;
51 : static constexpr uint32_t PRIME4 = 0x27D4EB2FU;
52 : static constexpr uint32_t PRIME5 = 0x165667B1U;
53 :
54 : SP_HASH_INLINE static constexpr uint32_t rotl (uint32_t x, int r) {
55 246207 : return ((x << r) | (x >> (32 - r)));
56 : }
57 : SP_HASH_INLINE static constexpr uint32_t round (uint32_t acc, const uint32_t input) {
58 986048 : return rotl(acc + (input * PRIME2), 13) * PRIME1;
59 : }
60 : SP_HASH_INLINE static constexpr uint32_t avalanche_step (const uint32_t h, const int rshift, const uint32_t prime) {
61 568666 : return (h ^ (h >> rshift)) * prime;
62 : }
63 : SP_HASH_INLINE static constexpr uint32_t avalanche (const uint32_t h) {
64 1705998 : return avalanche_step(avalanche_step(avalanche_step(h, 15, PRIME2), 13, PRIME3), 16, 1);
65 : }
66 : SP_HASH_INLINE static constexpr uint32_t endian32 (const char *v) {
67 1706642 : return uint32_t(static_cast<uint8_t>(v[0])) | (uint32_t(static_cast<uint8_t>(v[1])) << 8)
68 967106 : | (uint32_t(static_cast<uint8_t>(v[2])) << 16) | (uint32_t(static_cast<uint8_t>(v[3])) << 24);
69 : }
70 : SP_HASH_INLINE static constexpr uint32_t fetch32 (const char *p, const uint32_t v) {
71 1972096 : return round(v, endian32(p));
72 : }
73 : SP_HASH_INLINE static constexpr uint32_t finalize (uint32_t h, const char *p, uint32_t len) {
74 1289260 : while (len >= 4) {
75 720594 : h = rotl(h + (endian32(p) * PRIME3), 17) * PRIME4;
76 720594 : len -= 4;
77 720594 : p += 4;
78 : }
79 1224197 : while (len > 0) {
80 655531 : h = rotl(h + (static_cast<uint8_t>(*p++) * PRIME5), 11) * PRIME1;
81 655531 : --len;
82 : }
83 568666 : return avalanche(h);
84 : }
85 : SP_HASH_INLINE static constexpr uint32_t h16bytes (const char *p, uint32_t len, uint32_t v1, uint32_t v2, uint32_t v3, uint32_t v4) {
86 246207 : const char * const limit = p + len - 16;
87 : do {
88 246512 : v1 = fetch32(p, v1); p += 4;
89 246512 : v2 = fetch32(p, v2); p += 4;
90 246512 : v3 = fetch32(p, v3); p += 4;
91 246512 : v4 = fetch32(p, v4); p += 4;
92 246512 : } while (p <= limit);
93 984828 : return rotl(v1, 1) + rotl(v2, 7) + rotl(v3, 12) + rotl(v4, 18);
94 : }
95 : SP_HASH_INLINE static constexpr uint32_t h16bytes (const char *p, uint32_t len, const uint32_t seed) {
96 492414 : return h16bytes(p, len, seed + PRIME1 + PRIME2, seed + PRIME2, seed, seed - PRIME1);
97 : }
98 : };
99 :
100 : class xxh64 {
101 : public:
102 54179 : static constexpr uint64_t hash (const char *p, uint64_t len, uint64_t seed) {
103 162537 : return finalize ((len >= 32
104 54179 : ? h32bytes (p, len, seed)
105 : : seed + PRIME5)
106 108358 : + len, p + (len & ~0x1F), len & 0x1F);
107 : }
108 :
109 : private:
110 : static constexpr uint64_t PRIME1 = 0x9E3779B185EBCA87ULL; /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */
111 : static constexpr uint64_t PRIME2 = 0xC2B2AE3D27D4EB4FULL; /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */
112 : static constexpr uint64_t PRIME3 = 0x165667B19E3779F9ULL; /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */
113 : static constexpr uint64_t PRIME4 = 0x85EBCA77C2B2AE63ULL; /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */
114 : static constexpr uint64_t PRIME5 = 0x27D4EB2F165667C5ULL; /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */
115 :
116 : SP_HASH_INLINE static constexpr uint64_t rotl (uint64_t x, int r) {
117 53979 : return ((x << r) | (x >> (64 - r)));
118 : }
119 : SP_HASH_INLINE static constexpr uint64_t mix1 (const uint64_t h, const uint64_t prime, int rshift) {
120 54179 : return (h ^ (h >> rshift)) * prime;
121 : }
122 : SP_HASH_INLINE static constexpr uint64_t mix2 (const uint64_t p, const uint64_t v = 0) {
123 1023628 : return rotl (v + p * PRIME2, 31) * PRIME1;
124 : }
125 : SP_HASH_INLINE static constexpr uint64_t mix3 (const uint64_t h, const uint64_t v) {
126 215916 : return (h ^ mix2 (v)) * PRIME1 + PRIME4;
127 : }
128 : SP_HASH_INLINE static constexpr uint32_t endian32(const char *v) {
129 281 : return uint32_t(static_cast<uint8_t>(v[0])) | (uint32_t(static_cast<uint8_t>(v[1])) << 8)
130 281 : | (uint32_t(static_cast<uint8_t>(v[2])) << 16) | (uint32_t(static_cast<uint8_t>(v[3])) << 24);
131 : }
132 : SP_HASH_INLINE static constexpr uint64_t endian64 (const char *v) {
133 807712 : return uint64_t(static_cast<uint8_t>(v[0])) | (uint64_t(static_cast<uint8_t>(v[1])) << 8)
134 807712 : | (uint64_t(static_cast<uint8_t>(v[2])) << 16) | (uint64_t(static_cast<uint8_t>(v[3])) << 24)
135 807712 : | (uint64_t(static_cast<uint8_t>(v[4])) << 32) | (uint64_t(static_cast<uint8_t>(v[5])) << 40)
136 323587 : | (uint64_t(static_cast<uint8_t>(v[6])) << 48) | (uint64_t(static_cast<uint8_t>(v[7])) << 56);
137 : }
138 : SP_HASH_INLINE static constexpr uint64_t fetch64 (const char *p, const uint64_t v = 0) {
139 1615424 : return mix2 (endian64 (p), v);
140 : }
141 : SP_HASH_INLINE static constexpr uint64_t fetch32 (const char *p) {
142 281 : return uint64_t (endian32 (p)) * PRIME1;
143 : }
144 : SP_HASH_INLINE static constexpr uint64_t finalize (uint64_t h, const char * __restrict__ p, uint64_t len) {
145 216391 : while (len >= 8) {
146 162212 : h = rotl (h ^ fetch64 (p), 27) * PRIME1 + PRIME4;
147 162212 : p += 8;
148 162212 : len -= 8;
149 : }
150 54179 : if (len >= 4) {
151 281 : h = rotl (h ^ fetch32 (p), 23) * PRIME2 + PRIME3;
152 281 : p += 4;
153 281 : len -= 4;
154 : }
155 54479 : while (len > 0) {
156 300 : h = rotl (h ^ ((static_cast<uint8_t>(*p++)) * PRIME5), 11) * PRIME1;
157 300 : --len;
158 : }
159 162537 : return (mix1 (mix1 (mix1 (h, PRIME2, 33), PRIME3, 29), 1, 32));
160 : }
161 : SP_HASH_INLINE static constexpr uint64_t h32bytes(const char *__restrict__ p, uint64_t len, uint64_t v1, uint64_t v2, uint64_t v3, uint64_t v4) {
162 53979 : const char *const limit = p + len - 32;
163 : do {
164 161375 : v1 = fetch64(p, v1); p += 8;
165 161375 : v2 = fetch64(p, v2); p += 8;
166 161375 : v3 = fetch64(p, v3); p += 8;
167 161375 : v4 = fetch64(p, v4); p += 8;
168 161375 : } while (p <= limit);
169 431832 : return mix3(mix3(mix3(mix3(rotl(v1, 1) + rotl(v2, 7) + rotl(v3, 12) + rotl(v4, 18), v1), v2), v3), v4);
170 : }
171 : SP_HASH_INLINE static constexpr uint64_t h32bytes(const char *__restrict__ p, uint64_t len, const uint64_t seed) {
172 107958 : return h32bytes(p, len, seed + PRIME1 + PRIME2, seed + PRIME2, seed, seed - PRIME1);
173 : }
174 : };
175 :
176 568672 : inline constexpr uint32_t hash32(const char* str, uint32_t len, uint32_t seed = 0) {
177 568672 : return xxh32::hash(str, len, seed);
178 : }
179 :
180 53923 : inline constexpr uint64_t hash64(const char* str, size_t len, uint64_t seed = 0) {
181 53923 : return xxh64::hash(str, len, seed);
182 : }
183 :
184 216 : inline constexpr size_t hashSize(const char* str, size_t len, uint64_t seed = 0) {
185 : if constexpr (sizeof(size_t) == 4) {
186 : return xxh32::hash(str, len, seed);
187 : } else {
188 216 : return xxh64::hash(str, len, seed);
189 : }
190 : }
191 :
192 : }
193 :
194 : #endif /* LIBSTAPPLER_COMMON_STRING_SPHASH_H_ */
|