inline
 mask_type operator>=(const short_vec<float, 32>& other) const
 {
     return
         (_mm512_cmp_ps_mask(val1, other.val1, _CMP_GE_OS) <<  0) +
         (_mm512_cmp_ps_mask(val2, other.val2, _CMP_GE_OS) << 16);
 }
Exemplo n.º 2
0
// exp()
inline mic_m512_t mic_exp_ps(mic_m512_t x) {
	x = _mm512_min_ps(x, _ps_exp_hi);
	x = _mm512_max_ps(x, _ps_exp_lo);

	mic_m512_t temp_2 = _mm512_fmadd_ps(x, _ps_cephes_LOG2EF, _ps_0point5);

	mic_m512_t temp_1 = _mm512_round_ps(temp_2, _MM_FROUND_TO_NEAREST_INT, _MM_EXPADJ_NONE);
	mic_m512_t temp_3 = _mm512_sub_ps(temp_1, temp_2);
	__mmask16 mask = _mm512_cmp_ps_mask(temp_3, _ps_0, _MM_CMPINT_GT);

	temp_2 = _mm512_mask_sub_ps(temp_1, mask, temp_1, _ps_1);
	__m512i emm0 = _mm512_cvtfxpnt_round_adjustps_epi32(temp_2, _MM_FROUND_TO_NEAREST_INT, _MM_EXPADJ_NONE);

	x = _mm512_fnmadd_ps(temp_2, _ps_cephes_exp_C12, x);

	mic_m512_t x2 = _mm512_mul_ps(x, x);
	mic_m512_t x3 = _mm512_mul_ps(x2, x);
	mic_m512_t x4 = _mm512_mul_ps(x2, x2);
 
	temp_1 = _mm512_add_ps(x, _ps_1);
	temp_1 = _mm512_fmadd_ps(x2, _ps_cephes_exp_p5, temp_1);
	temp_1 = _mm512_fmadd_ps(x3, _ps_cephes_exp_p4, temp_1);

	temp_2 = _mm512_mul_ps(x3, _ps_cephes_exp_p0);
	temp_3 = _mm512_mul_ps(x2, _ps_cephes_exp_p1);

	mic_m512_t temp_4 = _mm512_mul_ps(x, _ps_cephes_exp_p2);

	emm0 = _mm512_add_epi32(emm0, _pi32_0x7f);

	temp_2 = _mm512_add_ps(temp_2, temp_3);
	temp_3 = _mm512_add_ps(temp_3, temp_4);
	temp_2 = _mm512_add_ps(temp_2, temp_3);

	emm0 = _mm512_slli_epi32(emm0, 23);
	mic_m512_t pow2n = _mm512_castsi512_ps(emm0);

	temp_2 = _mm512_mul_ps(temp_2, x4);

	mic_m512_t y = _mm512_add_ps(temp_1, temp_2);

	y = _mm512_mul_ps(y, pow2n);
	return y;
} // newexp_ps()
Exemplo n.º 3
0
 static batch_bool_type isnan(const batch_type& x)
 {
     return _mm512_cmp_ps_mask(x, x, _CMP_UNORD_Q);
 }
Exemplo n.º 4
0
 static batch_bool_type lte(const batch_type& lhs, const batch_type& rhs)
 {
     return _mm512_cmp_ps_mask(lhs, rhs, _CMP_LE_OQ);
 }
Exemplo n.º 5
0
void
test8bit (void)
{
  m512i = _mm512_permutex_epi64 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_mask_permutex_epi64 (m512i, mmask8, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_maskz_permutex_epi64 (mmask8, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512i = _mm512_ternarylogic_epi64 (m512i, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_mask_ternarylogic_epi64 (m512i, mmask8, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_maskz_ternarylogic_epi64 (mmask8, m512i, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_ternarylogic_epi32 (m512i, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_mask_ternarylogic_epi32 (m512i, mmask16, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_maskz_ternarylogic_epi32 (mmask16, m512i, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512i = _mm512_shuffle_epi32 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_mask_shuffle_epi32 (m512i, mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_maskz_shuffle_epi32 (mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512i = _mm512_shuffle_i64x2 (m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_mask_shuffle_i64x2 (m512i, mmask8, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_maskz_shuffle_i64x2 (mmask8, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512i = _mm512_shuffle_i32x4 (m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_mask_shuffle_i32x4 (m512i, mmask16, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_maskz_shuffle_i32x4 (mmask16, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512d = _mm512_shuffle_f64x2 (m512d, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512d = _mm512_mask_shuffle_f64x2 (m512d, mmask8, m512d, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512d = _mm512_maskz_shuffle_f64x2 (mmask8, m512d, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512  = _mm512_shuffle_f32x4 (m512, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512  = _mm512_mask_shuffle_f32x4 (m512, mmask16, m512, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512  = _mm512_maskz_shuffle_f32x4 (mmask16, m512, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512d = _mm512_permutex_pd (m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512d = _mm512_mask_permutex_pd (m512d, mmask8, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512d = _mm512_maskz_permutex_pd (mmask8, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512d = _mm512_permute_pd (m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512d = _mm512_mask_permute_pd (m512d, mmask8, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512d = _mm512_maskz_permute_pd (mmask8, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512  = _mm512_permute_ps (m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512  = _mm512_mask_permute_ps (m512, mmask16, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512  = _mm512_maskz_permute_ps (mmask16, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512d = _mm512_shuffle_pd (m512d, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512d = _mm512_mask_shuffle_pd (m512d, mmask8, m512d, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512d = _mm512_maskz_shuffle_pd (mmask8, m512d, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512  = _mm512_shuffle_ps (m512, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512  = _mm512_mask_shuffle_ps (m512, mmask16, m512, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512  = _mm512_maskz_shuffle_ps (mmask16, m512, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512d = _mm512_fixupimm_pd (m512d, m512d, m512i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m512d = _mm512_mask_fixupimm_pd (m512d, mmask8, m512d, m512i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m512d = _mm512_maskz_fixupimm_pd (mmask8, m512d, m512d, m512i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */

  m512  = _mm512_fixupimm_ps (m512, m512, m512i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m512  = _mm512_mask_fixupimm_ps (m512, mmask16, m512, m512i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m512  = _mm512_maskz_fixupimm_ps (mmask16, m512, m512, m512i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */

  m128d = _mm_fixupimm_sd (m128d, m128d, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m128d = _mm_mask_fixupimm_sd (m128d, mmask8, m128d, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m128d = _mm_maskz_fixupimm_sd (mmask8, m128d, m128d, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */

  m128  = _mm_fixupimm_ss (m128, m128, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m128  = _mm_mask_fixupimm_ss (m128, mmask8, m128, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m128  = _mm_maskz_fixupimm_ss (mmask8, m128, m128, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */

  m512i = _mm512_rol_epi32 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_mask_rol_epi32 (m512i, mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_maskz_rol_epi32 (mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512i = _mm512_ror_epi32 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_mask_ror_epi32 (m512i, mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_maskz_ror_epi32 (mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512i = _mm512_rol_epi64 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_mask_rol_epi64 (m512i, mmask8, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_maskz_rol_epi64 (mmask8, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512i = _mm512_ror_epi64 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_mask_ror_epi64 (m512i, mmask8, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_maskz_ror_epi64 (mmask8, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m256i = _mm512_cvtps_ph (m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m256i = _mm512_mask_cvtps_ph (m256i, mmask16, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m256i = _mm512_maskz_cvtps_ph (mmask16, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512d = _mm512_roundscale_pd (m512d, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m512d = _mm512_mask_roundscale_pd (m512d, mmask8, m512d, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m512d = _mm512_maskz_roundscale_pd (mmask8, m512d, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */

  m512  = _mm512_roundscale_ps (m512, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m512  = _mm512_mask_roundscale_ps (m512, mmask16, m512, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m512  = _mm512_maskz_roundscale_ps (mmask16, m512, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */

  m128d = _mm_roundscale_sd (m128d, m128d, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m128  = _mm_roundscale_ss (m128, m128, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */

  m512i = _mm512_alignr_epi32 (m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_mask_alignr_epi32 (m512i, mmask16, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_maskz_alignr_epi32 (mmask16, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_alignr_epi64 (m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_mask_alignr_epi64 (m512i, mmask8, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_maskz_alignr_epi64 (mmask8, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  mmask8 = _mm512_cmp_epi64_mask (m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */
  mmask8 = _mm512_cmp_epi32_mask (m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */
  mmask8 = _mm512_cmp_epu64_mask (m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */
  mmask8 = _mm512_cmp_epu32_mask (m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */
  mmask8 = _mm512_cmp_pd_mask (m512d, m512d, 256); /* { dg-error "the immediate argument must be a 5-bit immediate" } */
  mmask8 = _mm512_cmp_ps_mask (m512, m512, 256); /* { dg-error "the immediate argument must be a 5-bit immediate" } */
  mmask8 = _mm512_mask_cmp_epi64_mask (2, m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */
  mmask8 = _mm512_mask_cmp_epi32_mask (2, m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */
  mmask8 = _mm512_mask_cmp_epu64_mask (2, m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */
  mmask8 = _mm512_mask_cmp_epu32_mask (2, m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */
  mmask8 = _mm512_mask_cmp_pd_mask (2, m512d, m512d, 256); /* { dg-error "the immediate argument must be a 5-bit immediate" } */
  mmask8 = _mm512_mask_cmp_ps_mask (2, m512, m512, 256); /* { dg-error "the immediate argument must be a 5-bit immediate" } */
  mmask8 = _mm_cmp_sd_mask (m128d, m128d, 256); /* { dg-error "the immediate argument must be a 5-bit immediate" } */
  mmask8 = _mm_cmp_ss_mask (m128, m128, 256); /* { dg-error "the immediate argument must be a 5-bit immediate" } */
  mmask8 = _mm_mask_cmp_sd_mask (1, m128d, m128d, 256); /* { dg-error "the immediate argument must be a 5-bit immediate" } */
  mmask8 = _mm_mask_cmp_ss_mask (1, m128, m128, 256); /* { dg-error "the immediate argument must be a 5-bit immediate" } */

}
Exemplo n.º 6
0
void AVX512BW_mandelbrot(
	float Re_min, float Re_max,
	float Im_min, float Im_max,
	float threshold,
	int maxiters,
	int width, int height,
	uint8_t *data)

{
	float dRe, dIm;
	int x, y;

	__m128i* ptr = (__m128i*)data;

	// step on Re and Im axis
	dRe = (Re_max - Re_min)/width;
	dIm = (Im_max - Im_min)/height;

	// prepare vectors
	// 1. threshold
    const __m512 vec_threshold = _mm512_set1_ps(threshold);

	// 2. Cim
    __m512 Cim = _mm512_set1_ps(Im_min);

	// 3. Re advance every x iteration
    const __m512 vec_dRe = _mm512_set1_ps(16*dRe);

	// 4. Im advance every y iteration
    const __m512 vec_dIm = _mm512_set1_ps(dIm);

	// calculations
	for (y=0; y < height; y++) {

        __m512 Cre = _mm512_setr_ps(
            Re_min +  0*dRe, Re_min +  1*dRe, Re_min +  2*dRe, Re_min +  3*dRe,
            Re_min +  4*dRe, Re_min +  5*dRe, Re_min +  6*dRe, Re_min +  7*dRe,
            Re_min +  8*dRe, Re_min +  9*dRe, Re_min + 10*dRe, Re_min + 11*dRe,
            Re_min + 12*dRe, Re_min + 13*dRe, Re_min + 14*dRe, Re_min + 15*dRe
        );

		for (x=0; x < width; x+=16) {

            __m512 Xre = _mm512_setzero_ps();
            __m512 Xim = _mm512_setzero_ps();

            __m128i itercount = _mm_setzero_si128();

            int i;
            for (i=0; i < maxiters; i++) {

			    // Tre = Xre^2 - Xim^2 + Cim
                const __m512 Xre2 = _mm512_mul_ps(Xre, Xre);
                const __m512 Xim2 = _mm512_mul_ps(Xim, Xim);
                const __m512 Tre  = _mm512_add_ps(Cre, _mm512_sub_ps(Xre2, Xim2));

			    // Tim = 2*Xre*Xim + Cre
                const __m512 t1  = _mm512_mul_ps(Xre, Xim);
                const __m512 Tim = _mm512_add_ps(Cim, _mm512_add_ps(t1, t1));

                // sqr_dist = Tre^2 + Tim^2
                __m512 Tre2 = _mm512_mul_ps(Tre, Tre);
                __m512 Tim2 = _mm512_mul_ps(Tim, Tim);
                __m512 sqr_dist = _mm512_add_ps(Tre2, Tim2);

                // sqr_dist < threshold => 16-bit mask
                __mmask16 mask = _mm512_cmp_ps_mask(sqr_dist, vec_threshold, _CMP_LE_OS);
                if (mask == 0) {
                    break;
                }

                // Note: unlike SSE/AVX2 versions itercount is a packed byte vector,
                //       thus conversion packed dword -> byte is not needed.
                itercount = _mm_sub_epi8(itercount, _mm_movm_epi8(mask));

                Xre = Tre;
                Xim = Tim;

            } // for

            *ptr++ = itercount;

			// advance Cre vector
            Cre = _mm512_add_ps(Cre, vec_dRe);
		}

		// advance Cim vector
        Cim = _mm512_add_ps(Cim, vec_dIm);
	}
}