inline void mic_sincos_ps(mic_m512_t x, mic_m512_t *s, mic_m512_t *c) { __m512i sign_bit = _mm512_and_epi32(_mm512_castps_si512(x), _pi32_sign_mask); x = _mm512_castsi512_ps(_mm512_and_epi32(_mm512_castps_si512(x), _pi32_inv_sign_mask)); mic_m512_t y = _mm512_mul_ps(x, _ps_cephes_FOPI); __m512i emm2 = _mm512_cvtfxpnt_round_adjustps_epi32(y, _MM_FROUND_TO_NEAREST_INT, _MM_EXPADJ_NONE); emm2 = _mm512_add_epi32(emm2, _pi32_1); emm2 = _mm512_and_epi32(emm2, _pi32_inv1); y = _mm512_cvtfxpnt_round_adjustepu32_ps(emm2, _MM_FROUND_TO_NEAREST_INT, _MM_EXPADJ_NONE); __m512i cos_emm2 = _mm512_sub_epi32(emm2, _pi32_2); __m512i emm0 = _mm512_and_epi32(emm2, _pi32_4); __m512i cos_emm0 = _mm512_andnot_epi32(cos_emm2, _pi32_4); emm0 = _mm512_slli_epi32(emm0, 29); cos_emm0 = _mm512_slli_epi32(cos_emm0, 29); sign_bit = _mm512_xor_epi32(sign_bit, emm0); emm2 = _mm512_and_epi32(emm2, _pi32_2); cos_emm2 = _mm512_and_epi32(cos_emm2, _pi32_2); __mmask16 mask = _mm512_cmp_epi32_mask(emm2, _pi32_0, _MM_CMPINT_EQ); emm2 = _mm512_mask_add_epi32(_pi32_0, mask, _pi32_ffff, _pi32_0); __mmask16 cos_mask = _mm512_cmp_epi32_mask(cos_emm2, _pi32_0, _MM_CMPINT_EQ); cos_emm2 = _mm512_mask_add_epi32(_pi32_0, cos_mask, _pi32_ffff, _pi32_0); x = _mm512_fmadd_ps(y, _ps_minus_cephes_DP123, x); mic_m512_t x2 = _mm512_mul_ps(x, x); mic_m512_t x3 = _mm512_mul_ps(x2, x); mic_m512_t x4 = _mm512_mul_ps(x2, x2); y = _mm512_fmadd_ps(_ps_coscof_p0, x2, _ps_coscof_p1); y = _mm512_fmadd_ps(y, x2, _ps_coscof_p2); mic_m512_t temp_2 = _mm512_fmsub_ps(x2, _ps_0point5, _ps_1); y = _mm512_fmsub_ps(y, x4, temp_2); mic_m512_t y2 = _mm512_fmadd_ps(_ps_sincof_p0, x2, _ps_sincof_p1); y2 = _mm512_fmadd_ps(y2, x2, _ps_sincof_p2); y2 = _mm512_fmadd_ps(y2, x3, x); mic_m512_t cos_y = y; mic_m512_t cos_y2 = y2; y = _mm512_castsi512_ps(_mm512_andnot_epi32(emm2, _mm512_castps_si512(y))); cos_y = _mm512_castsi512_ps(_mm512_andnot_epi32(cos_emm2, _mm512_castps_si512(cos_y))); y2 = _mm512_castsi512_ps(_mm512_and_epi32(emm2, _mm512_castps_si512(y2))); cos_y2 = _mm512_castsi512_ps(_mm512_and_epi32(cos_emm2, _mm512_castps_si512(cos_y2))); y = _mm512_add_ps(y, y2); cos_y = _mm512_add_ps(cos_y, cos_y2); *s = _mm512_castsi512_ps(_mm512_xor_epi32(_mm512_castps_si512(y), sign_bit)); *c = _mm512_castsi512_ps(_mm512_xor_epi32(_mm512_castps_si512(cos_y), cos_emm0)); } // sincos_ps()
// sin() static inline mic_m512_t mic_sin_ps(mic_m512_t x) { __m512i sign_bit; sign_bit = _mm512_and_epi32(_mm512_castps_si512(x), _pi32_sign_mask); x = _mm512_castsi512_ps(_mm512_and_epi32(_mm512_castps_si512(x), _pi32_inv_sign_mask)); mic_m512_t y = _mm512_mul_ps(x, _ps_cephes_FOPI); __m512i emm2 = _mm512_cvtfxpnt_round_adjustps_epi32(y, _MM_FROUND_TO_NEAREST_INT, _MM_EXPADJ_NONE); emm2 = _mm512_add_epi32(emm2, _pi32_1); emm2 = _mm512_and_epi32(emm2, _pi32_inv1); y = _mm512_cvtfxpnt_round_adjustepu32_ps(emm2, _MM_FROUND_TO_NEAREST_INT, _MM_EXPADJ_NONE); __m512i emm0 = _mm512_and_epi32(emm2, _pi32_4); emm0 = _mm512_slli_epi32(emm0, 29); emm2 = _mm512_and_epi32(emm2, _pi32_2); __mmask16 mask = _mm512_cmp_epi32_mask(emm2, _pi32_0, _MM_CMPINT_EQ); emm2 = _mm512_mask_add_epi32(_pi32_0, mask, _pi32_ffff, _pi32_0); sign_bit = _mm512_xor_epi32(sign_bit, emm0); mic_m512_t temp = _ps_minus_cephes_DP123; temp = _mm512_mul_ps(y, temp); x = _mm512_add_ps(x, temp); mic_m512_t x2 = _mm512_mul_ps(x, x); mic_m512_t x3 = _mm512_mul_ps(x2, x); mic_m512_t x4 = _mm512_mul_ps(x2, x2); y = _mm512_mul_ps(_ps_coscof_p0, x2); mic_m512_t y2 = _mm512_mul_ps(_ps_sincof_p0, x2); y = _mm512_add_ps(y, _ps_coscof_p1); y2 = _mm512_add_ps(y2, _ps_sincof_p1); y = _mm512_mul_ps(y, x2); y2 = _mm512_mul_ps(y2, x2); y = _mm512_add_ps(y, _ps_coscof_p2); y2 = _mm512_add_ps(y2, _ps_sincof_p2); y = _mm512_mul_ps(y, x4); y2 = _mm512_mul_ps(y2, x3); temp = _mm512_mul_ps(x2, _ps_0point5); temp = _mm512_sub_ps(temp, _ps_1); y = _mm512_sub_ps(y, temp); y2 = _mm512_add_ps(y2, x); y = _mm512_castsi512_ps(_mm512_andnot_epi32(emm2, _mm512_castps_si512(y))); y2 = _mm512_castsi512_ps(_mm512_and_epi32(emm2, _mm512_castps_si512(y2))); y = _mm512_add_ps(y, y2); y = _mm512_castsi512_ps(_mm512_xor_epi32(_mm512_castps_si512(y), sign_bit)); return y; } // sin_ps()
void test8bit (void) { m512i = _mm512_permutex_epi64 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_mask_permutex_epi64 (m512i, mmask8, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_maskz_permutex_epi64 (mmask8, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_ternarylogic_epi64 (m512i, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_mask_ternarylogic_epi64 (m512i, mmask8, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_maskz_ternarylogic_epi64 (mmask8, m512i, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_ternarylogic_epi32 (m512i, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_mask_ternarylogic_epi32 (m512i, mmask16, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_maskz_ternarylogic_epi32 (mmask16, m512i, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_shuffle_epi32 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_mask_shuffle_epi32 (m512i, mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_maskz_shuffle_epi32 (mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_shuffle_i64x2 (m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_mask_shuffle_i64x2 (m512i, mmask8, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_maskz_shuffle_i64x2 (mmask8, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_shuffle_i32x4 (m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_mask_shuffle_i32x4 (m512i, mmask16, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_maskz_shuffle_i32x4 (mmask16, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512d = _mm512_shuffle_f64x2 (m512d, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512d = _mm512_mask_shuffle_f64x2 (m512d, mmask8, m512d, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512d = _mm512_maskz_shuffle_f64x2 (mmask8, m512d, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512 = _mm512_shuffle_f32x4 (m512, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512 = _mm512_mask_shuffle_f32x4 (m512, mmask16, m512, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512 = _mm512_maskz_shuffle_f32x4 (mmask16, m512, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512d = _mm512_permutex_pd (m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512d = _mm512_mask_permutex_pd (m512d, mmask8, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512d = _mm512_maskz_permutex_pd (mmask8, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512d = _mm512_permute_pd (m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512d = _mm512_mask_permute_pd (m512d, mmask8, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512d = _mm512_maskz_permute_pd (mmask8, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512 = _mm512_permute_ps (m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512 = _mm512_mask_permute_ps (m512, mmask16, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512 = _mm512_maskz_permute_ps (mmask16, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512d = _mm512_shuffle_pd (m512d, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512d = _mm512_mask_shuffle_pd (m512d, mmask8, m512d, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512d = _mm512_maskz_shuffle_pd (mmask8, m512d, m512d, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512 = _mm512_shuffle_ps (m512, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512 = _mm512_mask_shuffle_ps (m512, mmask16, m512, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512 = _mm512_maskz_shuffle_ps (mmask16, m512, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512d = _mm512_fixupimm_pd (m512d, m512d, m512i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */ m512d = _mm512_mask_fixupimm_pd (m512d, mmask8, m512d, m512i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */ m512d = _mm512_maskz_fixupimm_pd (mmask8, m512d, m512d, m512i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */ m512 = _mm512_fixupimm_ps (m512, m512, m512i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */ m512 = _mm512_mask_fixupimm_ps (m512, mmask16, m512, m512i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */ m512 = _mm512_maskz_fixupimm_ps (mmask16, m512, m512, m512i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */ m128d = _mm_fixupimm_sd (m128d, m128d, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */ m128d = _mm_mask_fixupimm_sd (m128d, mmask8, m128d, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */ m128d = _mm_maskz_fixupimm_sd (mmask8, m128d, m128d, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */ m128 = _mm_fixupimm_ss (m128, m128, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */ m128 = _mm_mask_fixupimm_ss (m128, mmask8, m128, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */ m128 = _mm_maskz_fixupimm_ss (mmask8, m128, m128, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */ m512i = _mm512_rol_epi32 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_mask_rol_epi32 (m512i, mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_maskz_rol_epi32 (mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_ror_epi32 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_mask_ror_epi32 (m512i, mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_maskz_ror_epi32 (mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_rol_epi64 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_mask_rol_epi64 (m512i, mmask8, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_maskz_rol_epi64 (mmask8, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_ror_epi64 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_mask_ror_epi64 (m512i, mmask8, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_maskz_ror_epi64 (mmask8, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m256i = _mm512_cvtps_ph (m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m256i = _mm512_mask_cvtps_ph (m256i, mmask16, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m256i = _mm512_maskz_cvtps_ph (mmask16, m512, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512d = _mm512_roundscale_pd (m512d, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */ m512d = _mm512_mask_roundscale_pd (m512d, mmask8, m512d, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */ m512d = _mm512_maskz_roundscale_pd (mmask8, m512d, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */ m512 = _mm512_roundscale_ps (m512, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */ m512 = _mm512_mask_roundscale_ps (m512, mmask16, m512, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */ m512 = _mm512_maskz_roundscale_ps (mmask16, m512, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */ m128d = _mm_roundscale_sd (m128d, m128d, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */ m128 = _mm_roundscale_ss (m128, m128, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */ m512i = _mm512_alignr_epi32 (m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_mask_alignr_epi32 (m512i, mmask16, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_maskz_alignr_epi32 (mmask16, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_alignr_epi64 (m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_mask_alignr_epi64 (m512i, mmask8, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_maskz_alignr_epi64 (mmask8, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ mmask8 = _mm512_cmp_epi64_mask (m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */ mmask8 = _mm512_cmp_epi32_mask (m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */ mmask8 = _mm512_cmp_epu64_mask (m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */ mmask8 = _mm512_cmp_epu32_mask (m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */ mmask8 = _mm512_cmp_pd_mask (m512d, m512d, 256); /* { dg-error "the immediate argument must be a 5-bit immediate" } */ mmask8 = _mm512_cmp_ps_mask (m512, m512, 256); /* { dg-error "the immediate argument must be a 5-bit immediate" } */ mmask8 = _mm512_mask_cmp_epi64_mask (2, m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */ mmask8 = _mm512_mask_cmp_epi32_mask (2, m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */ mmask8 = _mm512_mask_cmp_epu64_mask (2, m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */ mmask8 = _mm512_mask_cmp_epu32_mask (2, m512i, m512i, 256); /* { dg-error "the last argument must be a 3-bit immediate" } */ mmask8 = _mm512_mask_cmp_pd_mask (2, m512d, m512d, 256); /* { dg-error "the immediate argument must be a 5-bit immediate" } */ mmask8 = _mm512_mask_cmp_ps_mask (2, m512, m512, 256); /* { dg-error "the immediate argument must be a 5-bit immediate" } */ mmask8 = _mm_cmp_sd_mask (m128d, m128d, 256); /* { dg-error "the immediate argument must be a 5-bit immediate" } */ mmask8 = _mm_cmp_ss_mask (m128, m128, 256); /* { dg-error "the immediate argument must be a 5-bit immediate" } */ mmask8 = _mm_mask_cmp_sd_mask (1, m128d, m128d, 256); /* { dg-error "the immediate argument must be a 5-bit immediate" } */ mmask8 = _mm_mask_cmp_ss_mask (1, m128, m128, 256); /* { dg-error "the immediate argument must be a 5-bit immediate" } */ }