inline void mic_sincos_ps(mic_m512_t x, mic_m512_t *s, mic_m512_t *c) {
	__m512i sign_bit = _mm512_and_epi32(_mm512_castps_si512(x), _pi32_sign_mask);
	x = _mm512_castsi512_ps(_mm512_and_epi32(_mm512_castps_si512(x), _pi32_inv_sign_mask));

	mic_m512_t y = _mm512_mul_ps(x, _ps_cephes_FOPI);

	__m512i emm2 = _mm512_cvtfxpnt_round_adjustps_epi32(y, _MM_FROUND_TO_NEAREST_INT, _MM_EXPADJ_NONE);
	emm2 = _mm512_add_epi32(emm2, _pi32_1);
	emm2 = _mm512_and_epi32(emm2, _pi32_inv1);
	y = _mm512_cvtfxpnt_round_adjustepu32_ps(emm2, _MM_FROUND_TO_NEAREST_INT, _MM_EXPADJ_NONE);

	__m512i cos_emm2 = _mm512_sub_epi32(emm2, _pi32_2);

	__m512i emm0 = _mm512_and_epi32(emm2, _pi32_4);
	__m512i cos_emm0 = _mm512_andnot_epi32(cos_emm2, _pi32_4);
	emm0 = _mm512_slli_epi32(emm0, 29);
	cos_emm0 = _mm512_slli_epi32(cos_emm0, 29);
	sign_bit = _mm512_xor_epi32(sign_bit, emm0);

	emm2 = _mm512_and_epi32(emm2, _pi32_2);
	cos_emm2 = _mm512_and_epi32(cos_emm2, _pi32_2);
	__mmask16 mask = _mm512_cmp_epi32_mask(emm2, _pi32_0, _MM_CMPINT_EQ);
	emm2 = _mm512_mask_add_epi32(_pi32_0, mask, _pi32_ffff, _pi32_0);
	__mmask16 cos_mask = _mm512_cmp_epi32_mask(cos_emm2, _pi32_0, _MM_CMPINT_EQ);
	cos_emm2 = _mm512_mask_add_epi32(_pi32_0, cos_mask, _pi32_ffff, _pi32_0);
	
	x = _mm512_fmadd_ps(y, _ps_minus_cephes_DP123, x);

	mic_m512_t x2 = _mm512_mul_ps(x, x);
	mic_m512_t x3 = _mm512_mul_ps(x2, x);
	mic_m512_t x4 = _mm512_mul_ps(x2, x2);

	y = _mm512_fmadd_ps(_ps_coscof_p0, x2, _ps_coscof_p1);
	y = _mm512_fmadd_ps(y, x2, _ps_coscof_p2);
	mic_m512_t temp_2 = _mm512_fmsub_ps(x2, _ps_0point5, _ps_1);
	y = _mm512_fmsub_ps(y, x4, temp_2);
	mic_m512_t y2 = _mm512_fmadd_ps(_ps_sincof_p0, x2, _ps_sincof_p1);
	y2 = _mm512_fmadd_ps(y2, x2, _ps_sincof_p2);
	y2 = _mm512_fmadd_ps(y2, x3, x);

	mic_m512_t cos_y = y;
	mic_m512_t cos_y2 = y2;

	y = _mm512_castsi512_ps(_mm512_andnot_epi32(emm2, _mm512_castps_si512(y)));
	cos_y = _mm512_castsi512_ps(_mm512_andnot_epi32(cos_emm2, _mm512_castps_si512(cos_y)));
	y2 = _mm512_castsi512_ps(_mm512_and_epi32(emm2, _mm512_castps_si512(y2)));
	cos_y2 = _mm512_castsi512_ps(_mm512_and_epi32(cos_emm2, _mm512_castps_si512(cos_y2)));

	y = _mm512_add_ps(y, y2);
	cos_y = _mm512_add_ps(cos_y, cos_y2);

	*s = _mm512_castsi512_ps(_mm512_xor_epi32(_mm512_castps_si512(y), sign_bit));
	*c = _mm512_castsi512_ps(_mm512_xor_epi32(_mm512_castps_si512(cos_y), cos_emm0));
} // sincos_ps()
// sin()
static inline mic_m512_t mic_sin_ps(mic_m512_t x) {
	__m512i sign_bit;
	sign_bit = _mm512_and_epi32(_mm512_castps_si512(x), _pi32_sign_mask);
	x = _mm512_castsi512_ps(_mm512_and_epi32(_mm512_castps_si512(x), _pi32_inv_sign_mask));
	
	mic_m512_t y = _mm512_mul_ps(x, _ps_cephes_FOPI);

	__m512i emm2 = _mm512_cvtfxpnt_round_adjustps_epi32(y, _MM_FROUND_TO_NEAREST_INT, _MM_EXPADJ_NONE);
	emm2 = _mm512_add_epi32(emm2, _pi32_1);
	emm2 = _mm512_and_epi32(emm2, _pi32_inv1);
	y = _mm512_cvtfxpnt_round_adjustepu32_ps(emm2, _MM_FROUND_TO_NEAREST_INT, _MM_EXPADJ_NONE);

	__m512i emm0 = _mm512_and_epi32(emm2, _pi32_4);
	emm0 = _mm512_slli_epi32(emm0, 29);

	emm2 = _mm512_and_epi32(emm2, _pi32_2);
	__mmask16 mask = _mm512_cmp_epi32_mask(emm2, _pi32_0, _MM_CMPINT_EQ);
	emm2 = _mm512_mask_add_epi32(_pi32_0, mask, _pi32_ffff, _pi32_0);
	
	sign_bit = _mm512_xor_epi32(sign_bit, emm0);
	
	mic_m512_t temp = _ps_minus_cephes_DP123;
	temp = _mm512_mul_ps(y, temp);
	x = _mm512_add_ps(x, temp);

	mic_m512_t x2 = _mm512_mul_ps(x, x);
	mic_m512_t x3 = _mm512_mul_ps(x2, x);
	mic_m512_t x4 = _mm512_mul_ps(x2, x2);

	y = _mm512_mul_ps(_ps_coscof_p0, x2);
	mic_m512_t y2 = _mm512_mul_ps(_ps_sincof_p0, x2);
	y = _mm512_add_ps(y, _ps_coscof_p1);
	y2 = _mm512_add_ps(y2, _ps_sincof_p1);
	y = _mm512_mul_ps(y, x2);
	y2 = _mm512_mul_ps(y2, x2);
	y = _mm512_add_ps(y, _ps_coscof_p2);
	y2 = _mm512_add_ps(y2, _ps_sincof_p2);
	y = _mm512_mul_ps(y, x4);
	y2 = _mm512_mul_ps(y2, x3);
	temp = _mm512_mul_ps(x2, _ps_0point5);
	temp = _mm512_sub_ps(temp, _ps_1);
	y = _mm512_sub_ps(y, temp);
	y2 = _mm512_add_ps(y2, x);

	y = _mm512_castsi512_ps(_mm512_andnot_epi32(emm2, _mm512_castps_si512(y)));
	y2 = _mm512_castsi512_ps(_mm512_and_epi32(emm2, _mm512_castps_si512(y2)));

	y = _mm512_add_ps(y, y2);
	y = _mm512_castsi512_ps(_mm512_xor_epi32(_mm512_castps_si512(y), sign_bit));

	return y;
} // sin_ps()
Exemple #3
0
void
test8bit (void)
{
  m512i = _mm512_permutex_epi64 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_mask_permutex_epi64 (m512i, mmask8, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_maskz_permutex_epi64 (mmask8, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512i = _mm512_ternarylogic_epi64 (m512i, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_mask_ternarylogic_epi64 (m512i, mmask8, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_maskz_ternarylogic_epi64 (mmask8, m512i, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_ternarylogic_epi32 (m512i, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_mask_ternarylogic_epi32 (m512i, mmask16, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_maskz_ternarylogic_epi32 (mmask16, m512i, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512i = _mm512_shuffle_epi32 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_mask_shuffle_epi32 (m512i, mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_maskz_shuffle_epi32 (mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512i = _mm512_shuffle_i64x2 (m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_mask_shuffle_i64x2 (m512i, mmask8, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_maskz_shuffle_i64x2 (mmask8, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512i = _mm512_shuffle_i32x4 (m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_mask_shuffle_i32x4 (m512i, mmask16, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_maskz_shuffle_i32x4 (mmask16, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512d = _mm512_shuffle_f64x2 (m512d, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512d = _mm512_mask_shuffle_f64x2 (m512d, mmask8, m512d, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512d = _mm512_maskz_shuffle_f64x2 (mmask8, m512d, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512  = _mm512_shuffle_f32x4 (m512, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512  = _mm512_mask_shuffle_f32x4 (m512, mmask16, m512, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512  = _mm512_maskz_shuffle_f32x4 (mmask16, m512, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512d = _mm512_permutex_pd (m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512d = _mm512_mask_permutex_pd (m512d, mmask8, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512d = _mm512_maskz_permutex_pd (mmask8, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512d = _mm512_permute_pd (m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512d = _mm512_mask_permute_pd (m512d, mmask8, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512d = _mm512_maskz_permute_pd (mmask8, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512  = _mm512_permute_ps (m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512  = _mm512_mask_permute_ps (m512, mmask16, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512  = _mm512_maskz_permute_ps (mmask16, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512d = _mm512_shuffle_pd (m512d, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512d = _mm512_mask_shuffle_pd (m512d, mmask8, m512d, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512d = _mm512_maskz_shuffle_pd (mmask8, m512d, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512  = _mm512_shuffle_ps (m512, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512  = _mm512_mask_shuffle_ps (m512, mmask16, m512, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512  = _mm512_maskz_shuffle_ps (mmask16, m512, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512d = _mm512_fixupimm_pd (m512d, m512d, m512i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m512d = _mm512_mask_fixupimm_pd (m512d, mmask8, m512d, m512i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m512d = _mm512_maskz_fixupimm_pd (mmask8, m512d, m512d, m512i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */

  m512  = _mm512_fixupimm_ps (m512, m512, m512i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m512  = _mm512_mask_fixupimm_ps (m512, mmask16, m512, m512i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m512  = _mm512_maskz_fixupimm_ps (mmask16, m512, m512, m512i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */

  m128d = _mm_fixupimm_sd (m128d, m128d, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m128d = _mm_mask_fixupimm_sd (m128d, mmask8, m128d, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m128d = _mm_maskz_fixupimm_sd (mmask8, m128d, m128d, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */

  m128  = _mm_fixupimm_ss (m128, m128, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m128  = _mm_mask_fixupimm_ss (m128, mmask8, m128, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m128  = _mm_maskz_fixupimm_ss (mmask8, m128, m128, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */

  m512i = _mm512_rol_epi32 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_mask_rol_epi32 (m512i, mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_maskz_rol_epi32 (mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512i = _mm512_ror_epi32 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_mask_ror_epi32 (m512i, mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_maskz_ror_epi32 (mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512i = _mm512_rol_epi64 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_mask_rol_epi64 (m512i, mmask8, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_maskz_rol_epi64 (mmask8, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512i = _mm512_ror_epi64 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_mask_ror_epi64 (m512i, mmask8, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_maskz_ror_epi64 (mmask8, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m256i = _mm512_cvtps_ph (m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m256i = _mm512_mask_cvtps_ph (m256i, mmask16, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m256i = _mm512_maskz_cvtps_ph (mmask16, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  m512d = _mm512_roundscale_pd (m512d, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m512d = _mm512_mask_roundscale_pd (m512d, mmask8, m512d, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m512d = _mm512_maskz_roundscale_pd (mmask8, m512d, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */

  m512  = _mm512_roundscale_ps (m512, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m512  = _mm512_mask_roundscale_ps (m512, mmask16, m512, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m512  = _mm512_maskz_roundscale_ps (mmask16, m512, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */

  m128d = _mm_roundscale_sd (m128d, m128d, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
  m128  = _mm_roundscale_ss (m128, m128, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */

  m512i = _mm512_alignr_epi32 (m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_mask_alignr_epi32 (m512i, mmask16, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_maskz_alignr_epi32 (mmask16, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_alignr_epi64 (m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_mask_alignr_epi64 (m512i, mmask8, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
  m512i = _mm512_maskz_alignr_epi64 (mmask8, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */

  mmask8 = _mm512_cmp_epi64_mask (m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */
  mmask8 = _mm512_cmp_epi32_mask (m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */
  mmask8 = _mm512_cmp_epu64_mask (m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */
  mmask8 = _mm512_cmp_epu32_mask (m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */
  mmask8 = _mm512_cmp_pd_mask (m512d, m512d, 256); /* { dg-error "the immediate argument must be a 5-bit immediate" } */
  mmask8 = _mm512_cmp_ps_mask (m512, m512, 256); /* { dg-error "the immediate argument must be a 5-bit immediate" } */
  mmask8 = _mm512_mask_cmp_epi64_mask (2, m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */
  mmask8 = _mm512_mask_cmp_epi32_mask (2, m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */
  mmask8 = _mm512_mask_cmp_epu64_mask (2, m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */
  mmask8 = _mm512_mask_cmp_epu32_mask (2, m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */
  mmask8 = _mm512_mask_cmp_pd_mask (2, m512d, m512d, 256); /* { dg-error "the immediate argument must be a 5-bit immediate" } */
  mmask8 = _mm512_mask_cmp_ps_mask (2, m512, m512, 256); /* { dg-error "the immediate argument must be a 5-bit immediate" } */
  mmask8 = _mm_cmp_sd_mask (m128d, m128d, 256); /* { dg-error "the immediate argument must be a 5-bit immediate" } */
  mmask8 = _mm_cmp_ss_mask (m128, m128, 256); /* { dg-error "the immediate argument must be a 5-bit immediate" } */
  mmask8 = _mm_mask_cmp_sd_mask (1, m128d, m128d, 256); /* { dg-error "the immediate argument must be a 5-bit immediate" } */
  mmask8 = _mm_mask_cmp_ss_mask (1, m128, m128, 256); /* { dg-error "the immediate argument must be a 5-bit immediate" } */

}