/
searchSIMDTree.c
44 lines (41 loc) · 1.28 KB
/
searchSIMDTree.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#include <stdio.h>
#include <stdlib.h>
#include <x86intrin.h>
#include <xmmintrin.h> // SSE
#include <emmintrin.h> // SSE2
#include <pmmintrin.h> // SSE3
#include <tmmintrin.h> // SSSE3
#include <smmintrin.h> // SSE4.1
#include <nmmintrin.h> // SSE4.2
#include <ammintrin.h> // SSE4A
int searchSIMDTree(int32_t **tree, int *fanout, int levels, int32_t value) {
int iLevel = 0;
int lOffset = 0;
int pOffset = 0;
int32_t cmpmask = 0;
int32_t eqmask = 0;
__m128i key = _mm_cvtsi32_si128(value);
key = _mm_shuffle_epi32(key, _MM_SHUFFLE(0,0,0,0));
while (iLevel < levels) {
int f = fanout[iLevel];
pOffset = lOffset;
lOffset *= f - 1;
int iter = 0;
int position = 0;
while (iter < f/4) {
__m128i delimiters = _mm_load_si128((__m128i const*)&tree[iLevel][lOffset + iter*4]);
__m128i compare = _mm_cmpgt_epi32(key, delimiters);
cmpmask = _mm_movemask_ps(_mm_castsi128_ps(compare));
cmpmask ^= 0x0F;
if (cmpmask) {
position = _bit_scan_forward(cmpmask);
break;
}
iter++;
}
int offset = lOffset + iter*4 + position;
lOffset = offset + pOffset;
iLevel++;
}
return lOffset;
}