static inline long conv_yHalf_yF (const uint16_t *src, float *dst, long samples) { const uint64_t *s_vec; __v4sf *d_vec; long n = samples; s_vec = (const uint64_t *)src; d_vec = (__v4sf *)dst; while (n >= 4) { __m128i in_val = _mm_insert_epi64((__m128i)_mm_setzero_ps(), *s_vec++, 0); __v4sf out_val = (__v4sf)_mm_cvtph_ps(in_val); _mm_storeu_ps((float *)d_vec++, out_val); n -= 4; } src = (const uint16_t *)s_vec; dst = (float *)d_vec; while (n) { __m128i in_val = _mm_insert_epi16((__m128i)_mm_setzero_ps(), *src++, 0); __v4sf out_val = (__v4sf)_mm_cvtph_ps(in_val); _mm_store_ss(dst++, out_val); n -= 1; } return samples; }
static void f16c_test (void) { union128i_w val; union128 res; float exp[4]; exp[0] = 1; exp[1] = -2; exp[2] = -1; exp[3] = 2; val.a[0] = 0x3c00; val.a[1] = 0xc000; val.a[2] = 0xbc00; val.a[3] = 0x4000; res.x = _mm_cvtph_ps (val.x); if (check_union128 (res, exp)) abort (); }
__m128 __attribute__((__target__("f16c"))) mm_cvtph_ps_wrap(__m128i a) { return _mm_cvtph_ps(a); }