uint64_t siphash13(const unsigned char key[16], const unsigned char *m, size_t len) { xmmi k,v02,v20,v13,v11,v33,mi; uint64_t last7; uint32_t lo, hi; size_t i, blocks; k = _mm_loadu_si128((xmmi *)(key + 0)); v02 = siphash_init[0].v; v13 = siphash_init[1].v; v02 = _mm_xor_si128(v02, _mm_unpacklo_epi64(k, k)); v13 = _mm_xor_si128(v13, _mm_unpackhi_epi64(k, k)); last7 = (uint64_t)(len & 0xff) << 56; for (i = 0, blocks = (len & ~7); i < blocks; i += 8) { mi = _mm_loadl_epi64((xmmi *)(m + i)); v13 = _mm_xor_si128(v13, _mm_slli_si128(mi, 8)); sipcompress() v02 = _mm_xor_si128(v02, mi); } switch (len - blocks) { case 7: last7 |= (uint64_t)m[i + 6] << 48; case 6: last7 |= (uint64_t)m[i + 5] << 40; case 5: last7 |= (uint64_t)m[i + 4] << 32; case 4: last7 |= (uint64_t)m[i + 3] << 24; case 3: last7 |= (uint64_t)m[i + 2] << 16; case 2: last7 |= (uint64_t)m[i + 1] << 8; case 1: last7 |= (uint64_t)m[i + 0] ; case 0: default:; }; mi = _mm_unpacklo_epi32(_mm_cvtsi32_si128((uint32_t)last7),_mm_cvtsi32_si128((uint32_t)(last7 >> 32))); v13 = _mm_xor_si128(v13, _mm_slli_si128(mi, 8)); sipcompress() v02 = _mm_xor_si128(v02, mi); v02 = _mm_xor_si128(v02, siphash_final.v); sipcompress() sipcompress() sipcompress() v02 = _mm_xor_si128(v02, v13); v02 = _mm_xor_si128(v02, _mm_shuffle_epi32(v02, _MM_SHUFFLE(1,0,3,2))); lo = _mm_cvtsi128_si32(v02); hi = _mm_cvtsi128_si32(_mm_srli_si128(v02, 4)); return ((uint64_t)hi << 32) | lo; }
uint64_t siphash(const unsigned char key[16], const unsigned char *m, size_t len) { uint64_t v0, v1, v2, v3; uint64_t mi, k0, k1; uint64_t last7; size_t i, blocks; k0 = U8TO64_LE(key + 0); k1 = U8TO64_LE(key + 8); v0 = k0 ^ 0x736f6d6570736575ull; v1 = k1 ^ 0x646f72616e646f6dull; v2 = k0 ^ 0x6c7967656e657261ull; v3 = k1 ^ 0x7465646279746573ull; last7 = (uint64_t)(len & 0xff) << 56; #define sipcompress() \ v0 += v1; v2 += v3; \ v1 = ROTL64(v1,13); v3 = ROTL64(v3,16); \ v1 ^= v0; v3 ^= v2; \ v0 = ROTL64(v0,32); \ v2 += v1; v0 += v3; \ v1 = ROTL64(v1,17); v3 = ROTL64(v3,21); \ v1 ^= v2; v3 ^= v0; \ v2 = ROTL64(v2,32); for (i = 0, blocks = (len & ~7); i < blocks; i += 8) { mi = U8TO64_LE(m + i); v3 ^= mi; sipcompress() sipcompress() v0 ^= mi; } switch (len - blocks) { case 7: last7 |= (uint64_t)m[i + 6] << 48; case 6: last7 |= (uint64_t)m[i + 5] << 40; case 5: last7 |= (uint64_t)m[i + 4] << 32; case 4: last7 |= (uint64_t)m[i + 3] << 24; case 3: last7 |= (uint64_t)m[i + 2] << 16; case 2: last7 |= (uint64_t)m[i + 1] << 8; case 1: last7 |= (uint64_t)m[i + 0] ; case 0: default:; }; v3 ^= last7; sipcompress() sipcompress() v0 ^= last7; v2 ^= 0xff; sipcompress() sipcompress() sipcompress() sipcompress() return v0 ^ v1 ^ v2 ^ v3; }
/* The 64bit 1-3 variant */ uint64_t siphash13(const unsigned char key[16], const unsigned char *m, size_t len) { uint64_t v0, v1, v2, v3; uint64_t mi, k0, k1; uint64_t last7; size_t i, blocks; k0 = U8TO64_LE(key + 0); k1 = U8TO64_LE(key + 8); v0 = k0 ^ 0x736f6d6570736575ull; v1 = k1 ^ 0x646f72616e646f6dull; v2 = k0 ^ 0x6c7967656e657261ull; v3 = k1 ^ 0x7465646279746573ull; last7 = (uint64_t)(len & 0xff) << 56; for (i = 0, blocks = (len & ~7); i < blocks; i += 8) { mi = U8TO64_LE(m + i); v3 ^= mi; sipcompress() /* 1 c round */ v0 ^= mi; } switch (len - blocks) { case 7: last7 |= (uint64_t)m[i + 6] << 48; case 6: last7 |= (uint64_t)m[i + 5] << 40; case 5: last7 |= (uint64_t)m[i + 4] << 32; case 4: last7 |= (uint64_t)m[i + 3] << 24; case 3: last7 |= (uint64_t)m[i + 2] << 16; case 2: last7 |= (uint64_t)m[i + 1] << 8; case 1: last7 |= (uint64_t)m[i + 0] ; case 0: default:; }; v3 ^= last7; sipcompress() /* 1 more c round */ v0 ^= last7; v2 ^= 0xff; sipcompress() /* and 3 final d rounds */ sipcompress() sipcompress() return v0 ^ v1 ^ v2 ^ v3; #undef sipcompress }