void TransLut_FindIndexAvx2 <TransLut::MapperLin>::find_index (const TransLut::FloatIntMix val_arr [8], __m256i &index, __m256 &frac) { assert (val_arr != 0); const __m256 scale = _mm256_set1_ps (1 << LINLUT_RES_L2); const __m256i offset = _mm256_set1_epi32 (-LINLUT_MIN_F * (1 << LINLUT_RES_L2)); const __m256i val_min = _mm256_setzero_si256 (); const __m256i val_max = _mm256_set1_epi32 (LINLUT_SIZE_F - 2); const __m256 v = _mm256_load_ps (reinterpret_cast <const float *> (val_arr)); const __m256 val_scl = _mm256_mul_ps (v, scale); const __m256i index_raw = _mm256_cvtps_epi32 (val_scl); __m256i index_tmp = _mm256_add_epi32 (index_raw, offset); index_tmp = _mm256_min_epi32 (index_tmp, val_max); index = _mm256_max_epi32 (index_tmp, val_min); frac = _mm256_sub_ps (val_scl, _mm256_cvtepi32_ps (index_raw)); }
__m256i test_mm256_min_epi32(__m256i a, __m256i b) { // CHECK: @llvm.x86.avx2.pmins.d return _mm256_min_epi32(a, b); }
__m256i test_mm256_min_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_min_epi32 // CHECK: [[CMP:%.*]] = icmp slt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] return _mm256_min_epi32(a, b); }