示例#1
0
void	TransLut_FindIndexAvx2 <TransLut::MapperLog>::find_index (const TransLut::FloatIntMix val_arr [8], __m256i &index, __m256 &frac)
{
	assert (val_arr != 0);

	// Constants
	static const int      mant_size = 23;
	static const int      exp_bias  = 127;
	static const uint32_t base      = (exp_bias + LOGLUT_MIN_L2) << mant_size;
	static const float    val_min   = 1.0f / (int64_t (1) << -LOGLUT_MIN_L2);
//	static const float    val_max   = float (int64_t (1) << LOGLUT_MAX_L2);
	static const int      frac_size = mant_size - LOGLUT_RES_L2;
	static const uint32_t frac_mask = (1 << frac_size) - 1;

	const __m256   zero_f     = _mm256_setzero_ps ();
	const __m256   one_f      = _mm256_set1_ps (1);
	const __m256   frac_mul   = _mm256_set1_ps (1.0f / (1 << frac_size));
	const __m256   mul_eps    = _mm256_set1_ps (1.0f / val_min);
	const __m256   mask_abs_f = _mm256_load_ps (
		reinterpret_cast <const float *> (fstb::ToolsAvx2::_mask_abs)
	);

	const __m256i  zero_i          = _mm256_setzero_si256 ();
	const __m256i  mask_abs_epi32  = _mm256_set1_epi32 (0x7FFFFFFF);
	const __m256i  one_epi32       = _mm256_set1_epi32 (1);
	const __m256i  base_epi32      = _mm256_set1_epi32 (int (base));
	const __m256i  frac_mask_epi32 = _mm256_set1_epi32 (frac_mask);
	const __m256i  val_min_epi32   =
		_mm256_set1_epi32 ((LOGLUT_MIN_L2 + exp_bias) << mant_size);
	const __m256i  val_max_epi32   =
		_mm256_set1_epi32 ((LOGLUT_MAX_L2 + exp_bias) << mant_size);
	const __m256i  index_max_epi32 =
		_mm256_set1_epi32 ((LOGLUT_MAX_L2 - LOGLUT_MIN_L2) << LOGLUT_RES_L2);
	const __m256i  hsize_epi32     = _mm256_set1_epi32 (LOGLUT_HSIZE);
	const __m256i  mirror_epi32    = _mm256_set1_epi32 (LOGLUT_HSIZE - 1);

	// It really starts here
	const __m256   val_f = _mm256_load_ps (reinterpret_cast <const float *> (val_arr));
	const __m256   val_a = _mm256_and_ps (val_f, mask_abs_f);
	const __m256i  val_i = _mm256_load_si256 (reinterpret_cast <const __m256i *> (val_arr));
	const __m256i  val_u = _mm256_and_si256 (val_i, mask_abs_epi32);

	// Standard path
	__m256i        index_std = _mm256_sub_epi32 (val_u, base_epi32);
	index_std = _mm256_srli_epi32 (index_std, frac_size);
	index_std = _mm256_add_epi32 (index_std, one_epi32);
	__m256i        frac_stdi = _mm256_and_si256 (val_u, frac_mask_epi32);
	__m256         frac_std  = _mm256_cvtepi32_ps (frac_stdi);
	frac_std  = _mm256_mul_ps (frac_std, frac_mul);

	// Epsilon path
	__m256         frac_eps  = _mm256_max_ps (val_a, zero_f);
	frac_eps = _mm256_mul_ps (frac_eps, mul_eps);

	// Range cases
	const __m256i  eps_flag_i = _mm256_cmpgt_epi32 (val_min_epi32, val_u);
	const __m256i  std_flag_i = _mm256_cmpgt_epi32 (val_max_epi32, val_u);
	const __m256   eps_flag_f = _mm256_castsi256_ps (eps_flag_i);
	const __m256   std_flag_f = _mm256_castsi256_ps (std_flag_i);
	__m256i        index_tmp  =
		fstb::ToolsAvx2::select (std_flag_i, index_std, index_max_epi32);
	__m256         frac_tmp   =
		fstb::ToolsAvx2::select (std_flag_f, frac_std, one_f);
	index_tmp = fstb::ToolsAvx2::select (eps_flag_i, zero_i, index_tmp);
	frac_tmp  = fstb::ToolsAvx2::select (eps_flag_f, frac_eps, frac_tmp);

	// Sign cases
	const __m256i  neg_flag_i = _mm256_srai_epi32 (val_i, 31);
	const __m256   neg_flag_f = _mm256_castsi256_ps (neg_flag_i);
	const __m256i  index_neg  = _mm256_sub_epi32 (mirror_epi32, index_tmp);
	const __m256i  index_pos  = _mm256_add_epi32 (hsize_epi32, index_tmp);
	const __m256   frac_neg   = _mm256_sub_ps (one_f, frac_tmp);
	index = fstb::ToolsAvx2::select (neg_flag_i, index_neg, index_pos);
	frac  = fstb::ToolsAvx2::select (neg_flag_f, frac_neg, frac_tmp);
}
示例#2
0
__m256i test_mm256_cmpgt_epi32(__m256i a, __m256i b) {
  // CHECK-LABEL: test_mm256_cmpgt_epi32
  // CHECK: icmp sgt <8 x i32>
  return _mm256_cmpgt_epi32(a, b);
}
示例#3
0
void extern
avx2_test (void)
{
    x = _mm256_cmpgt_epi32 (x, x);
}