tlx
siphash.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * tlx/siphash.hpp
3  *
4  * SipHash Implementations borrowed under Public Domain license from
5  * https://github.com/floodyberry/siphash
6  *
7  * Part of tlx - http://panthema.net/tlx
8  *
9  * Copyright (C) 2017 Timo Bingmann <tb@panthema.net>
10  *
11  * All rights reserved. Published under the Boost Software License, Version 1.0
12  ******************************************************************************/
13 
14 #ifndef TLX_SIPHASH_HEADER
15 #define TLX_SIPHASH_HEADER
16 
18 #include <tlx/math/bswap_le.hpp>
19 #include <tlx/math/rol.hpp>
20 
21 #include <cstdint>
22 #include <cstdlib>
23 #include <string>
24 
25 #if defined(_MSC_VER)
26 
27 #include <intrin.h>
28 
29 #if (_MSC_VER > 1200) || defined(_mm_free)
30 #define __SSE2__
31 #endif
32 
33 #endif // !defined(_MSC_VER)
34 
35 #if defined(__SSE2__)
36 #include <emmintrin.h>
37 #endif
38 
39 namespace tlx {
40 
41 static inline
42 uint64_t siphash_plain(const uint8_t key[16], const uint8_t* m, size_t len) {
43 
44  uint64_t v0, v1, v2, v3;
45  uint64_t mi, k0, k1;
46  uint64_t last7;
47  size_t i, blocks;
48 
49  k0 = bswap64_le(*reinterpret_cast<const uint64_t*>(key + 0));
50  k1 = bswap64_le(*reinterpret_cast<const uint64_t*>(key + 8));
51  v0 = k0 ^ 0x736f6d6570736575ull;
52  v1 = k1 ^ 0x646f72616e646f6dull;
53  v2 = k0 ^ 0x6c7967656e657261ull;
54  v3 = k1 ^ 0x7465646279746573ull;
55 
56  last7 = static_cast<uint64_t>(len & 0xff) << 56;
57 
58 #define TLX_SIPCOMPRESS() \
59  v0 += v1; v2 += v3; \
60  v1 = rol64(v1, 13); \
61  v3 = rol64(v3, 16); \
62  v1 ^= v0; v3 ^= v2; \
63  v0 = rol64(v0, 32); \
64  v2 += v1; v0 += v3; \
65  v1 = rol64(v1, 17); \
66  v3 = rol64(v3, 21); \
67  v1 ^= v2; v3 ^= v0; \
68  v2 = rol64(v2, 32);
69 
70  for (i = 0, blocks = (len & ~7); i < blocks; i += 8) {
71  mi = bswap64_le(*reinterpret_cast<const uint64_t*>(m + i));
72  v3 ^= mi;
75  v0 ^= mi;
76  }
77 
78  switch (len - blocks) {
79  case 7:
80  last7 |= static_cast<uint64_t>(m[i + 6]) << 48;
82  case 6:
83  last7 |= static_cast<uint64_t>(m[i + 5]) << 40;
85  case 5:
86  last7 |= static_cast<uint64_t>(m[i + 4]) << 32;
88  case 4:
89  last7 |= static_cast<uint64_t>(m[i + 3]) << 24;
91  case 3:
92  last7 |= static_cast<uint64_t>(m[i + 2]) << 16;
94  case 2:
95  last7 |= static_cast<uint64_t>(m[i + 1]) << 8;
97  case 1:
98  last7 |= static_cast<uint64_t>(m[i + 0]);
100  case 0:
101  default:;
102  }
103 
104  v3 ^= last7;
105  TLX_SIPCOMPRESS();
106  TLX_SIPCOMPRESS();
107  v0 ^= last7;
108  v2 ^= 0xff;
109  TLX_SIPCOMPRESS();
110  TLX_SIPCOMPRESS();
111  TLX_SIPCOMPRESS();
112  TLX_SIPCOMPRESS();
113 
114 #undef TLX_SIPCOMPRESS
115 
116  return v0 ^ v1 ^ v2 ^ v3;
117 }
118 
119 /******************************************************************************/
120 // SSE2 vectorization
121 
122 #if defined(__SSE2__)
123 
124 union siphash_packedelem64 {
125  uint64_t u[2];
126  __m128i v;
127 };
128 
129 /* 0,2,1,3 */
130 static const siphash_packedelem64 siphash_init[2] = {
131  {
132  { 0x736f6d6570736575ull, 0x6c7967656e657261ull }
133  },
134  {
135  { 0x646f72616e646f6dull, 0x7465646279746573ull }
136  }
137 };
138 
139 static const siphash_packedelem64 siphash_final = {
140  { 0x0000000000000000ull, 0x00000000000000ffull }
141 };
142 
143 static inline
144 uint64_t siphash_sse2(const uint8_t key[16], const uint8_t* m, size_t len) {
145 
146  __m128i k, v02, v20, v13, v11, v33, mi;
147  uint64_t last7;
148  uint32_t lo, hi;
149  size_t i, blocks;
150 
151  k = _mm_loadu_si128(reinterpret_cast<const __m128i*>(key + 0));
152  v02 = siphash_init[0].v;
153  v13 = siphash_init[1].v;
154  v02 = _mm_xor_si128(v02, _mm_unpacklo_epi64(k, k));
155  v13 = _mm_xor_si128(v13, _mm_unpackhi_epi64(k, k));
156 
157  last7 = static_cast<uint64_t>(len & 0xff) << 56;
158 
159 #define TLX_SIPCOMPRESS() \
160  v11 = v13; \
161  v33 = _mm_shuffle_epi32(v13, _MM_SHUFFLE(1, 0, 3, 2)); \
162  v11 = _mm_or_si128(_mm_slli_epi64(v11, 13), _mm_srli_epi64(v11, 64 - 13)); \
163  v02 = _mm_add_epi64(v02, v13); \
164  v33 = _mm_shufflelo_epi16(v33, _MM_SHUFFLE(2, 1, 0, 3)); \
165  v13 = _mm_unpacklo_epi64(v11, v33); \
166  v13 = _mm_xor_si128(v13, v02); \
167  v20 = _mm_shuffle_epi32(v02, _MM_SHUFFLE(0, 1, 3, 2)); \
168  v11 = v13; \
169  v33 = _mm_shuffle_epi32(v13, _MM_SHUFFLE(1, 0, 3, 2)); \
170  v11 = _mm_or_si128(_mm_slli_epi64(v11, 17), _mm_srli_epi64(v11, 64 - 17)); \
171  v20 = _mm_add_epi64(v20, v13); \
172  v33 = _mm_or_si128(_mm_slli_epi64(v33, 21), _mm_srli_epi64(v33, 64 - 21)); \
173  v13 = _mm_unpacklo_epi64(v11, v33); \
174  v02 = _mm_shuffle_epi32(v20, _MM_SHUFFLE(0, 1, 3, 2)); \
175  v13 = _mm_xor_si128(v13, v20);
176 
177  for (i = 0, blocks = (len & ~7); i < blocks; i += 8) {
178  mi = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(m + i));
179  v13 = _mm_xor_si128(v13, _mm_slli_si128(mi, 8));
180  TLX_SIPCOMPRESS();
181  TLX_SIPCOMPRESS();
182  v02 = _mm_xor_si128(v02, mi);
183  }
184 
185  switch (len - blocks) {
186  case 7:
187  last7 |= static_cast<uint64_t>(m[i + 6]) << 48;
189  case 6:
190  last7 |= static_cast<uint64_t>(m[i + 5]) << 40;
192  case 5:
193  last7 |= static_cast<uint64_t>(m[i + 4]) << 32;
195  case 4:
196  last7 |= static_cast<uint64_t>(m[i + 3]) << 24;
198  case 3:
199  last7 |= static_cast<uint64_t>(m[i + 2]) << 16;
201  case 2:
202  last7 |= static_cast<uint64_t>(m[i + 1]) << 8;
204  case 1:
205  last7 |= static_cast<uint64_t>(m[i + 0]);
207  case 0:
208  default:;
209  }
210 
211  mi = _mm_unpacklo_epi32(
212  _mm_cvtsi32_si128(static_cast<uint32_t>(last7)),
213  _mm_cvtsi32_si128(static_cast<uint32_t>(last7 >> 32)));
214  v13 = _mm_xor_si128(v13, _mm_slli_si128(mi, 8));
215  TLX_SIPCOMPRESS();
216  TLX_SIPCOMPRESS();
217  v02 = _mm_xor_si128(v02, mi);
218  v02 = _mm_xor_si128(v02, siphash_final.v);
219  TLX_SIPCOMPRESS();
220  TLX_SIPCOMPRESS();
221  TLX_SIPCOMPRESS();
222  TLX_SIPCOMPRESS();
223 
224  v02 = _mm_xor_si128(v02, v13);
225  v02 = _mm_xor_si128(v02, _mm_shuffle_epi32(v02, _MM_SHUFFLE(1, 0, 3, 2)));
226  lo = _mm_cvtsi128_si32(v02);
227  hi = _mm_cvtsi128_si32(_mm_srli_si128(v02, 4));
228 
229 #undef TLX_SIPCOMPRESS
230 
231  return (static_cast<uint64_t>(hi) << 32) | lo;
232 }
233 
234 #endif // defined(__SSE2__)
235 
236 /******************************************************************************/
237 // Switch between available implementations
238 
239 static inline
240 uint64_t siphash(const uint8_t key[16], const uint8_t* msg, size_t size) {
241 #if defined(__SSE2__)
242  return siphash_sse2(key, msg, size);
243 #else
244  return siphash_plain(key, msg, size);
245 #endif
246 }
247 
248 static inline
249 uint64_t siphash(const uint8_t* msg, size_t size) {
250  const unsigned char key[16] = {
251  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
252  };
253  return siphash(key, msg, size);
254 }
255 
256 static inline
257 uint64_t siphash(const char* msg, size_t size) {
258  return siphash(reinterpret_cast<const uint8_t*>(msg), size);
259 }
260 
261 static inline
262 uint64_t siphash(const std::string& str) {
263  return siphash(str.data(), str.size());
264 }
265 
266 template <typename Type>
267 static inline
268 uint64_t siphash(const Type& value) {
269  return siphash(reinterpret_cast<const uint8_t*>(&value), sizeof(value));
270 }
271 
272 #undef rol64
273 
274 } // namespace tlx
275 
276 #endif // !TLX_SIPHASH_HEADER
277 
278 /******************************************************************************/
#define TLX_ATTRIBUTE_FALLTHROUGH
#define TLX_SIPCOMPRESS()
static uint64_t siphash(const uint8_t key[16], const uint8_t *msg, size_t size)
Definition: siphash.hpp:240
static uint64_t siphash_plain(const uint8_t key[16], const uint8_t *m, size_t len)
Definition: siphash.hpp:42