__m128i branchfree_search4_avx(int* source, size_t n, __m128i target) {
    __m128i offsets = _mm_setzero_si128();
    if(n == 0) return offsets;

    __m128i ha = _mm_set1_epi32(n>>1);
    while(n>1) {
        n -=  n>>1;
        __m128i offsetsplushalf = _mm_add_epi32(offsets,ha);
        ha = _mm_sub_epi32(ha,_mm_srli_epi32(ha,1));
        __m128i keys = _mm_i32gather_epi32(source,offsetsplushalf,4);
        __m128i lt = _mm_cmplt_epi32(keys,target);
        offsets = _mm_blendv_epi8(offsets,offsetsplushalf,lt);
    }
    __m128i lastkeys = _mm_i32gather_epi32(source,offsets,4);
    __m128i lastlt = _mm_cmplt_epi32(lastkeys,target);
    __m128i oneswhereneeded = _mm_srli_epi32(lastlt,31);
    __m128i  answer = _mm_add_epi32(offsets,oneswhereneeded);
    return answer;
}
示例#2
0
__m128i test_mm_i32gather_epi32(int const *b, __m128i c) {
  // CHECK-LABEL: test_mm_i32gather_epi32
  // CHECK: call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %{{.*}}, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 2)
  return _mm_i32gather_epi32(b, c, 2);
}
示例#3
0
__m128i test_mm_i32gather_epi32(int const *b, __m128i c) {
  // CHECK: @llvm.x86.avx2.gather.d.d
  return _mm_i32gather_epi32(b, c, 2);
}
示例#4
0
void extern
avx2_test (void)
{
  x = _mm_i32gather_epi32 (base, idx, 1);
}