void kvz_eight_tap_filter_x8_and_flip(__m128i data01, __m128i data23, __m128i data45, __m128i data67, __m128i* filter, __m128i* dst) { __m128i a, b, c, d; __m128i fir = _mm_broadcastq_epi64(_mm_loadl_epi64(filter)); a = _mm_maddubs_epi16(data01, fir); b = _mm_maddubs_epi16(data23, fir); a = _mm_hadd_epi16(a, b); c = _mm_maddubs_epi16(data45, fir); d = _mm_maddubs_epi16(data67, fir); c = _mm_hadd_epi16(c, d); a = _mm_hadd_epi16(a, c); _mm_storeu_si128(dst, a); }
static void avx2_test (void) { union128i_q src, dst; long long int dst_ref[2]; int i; for (i = 0; i < NUM; i++) { init_pbroadcastq128 (src.a, i); dst.x = _mm_broadcastq_epi64 (src.x); calc_pbroadcastq128 (src.a, dst_ref); if (check_union128i_q (dst, dst_ref)) abort (); } }
__m128i test_mm_broadcastq_epi64(__m128i a) { // CHECK: @llvm.x86.avx2.pbroadcastq.128 return _mm_broadcastq_epi64(a); }
__m128i test_mm_broadcastq_epi64(__m128i a) { // CHECK-LABEL: test_mm_broadcastq_epi64 // CHECK-NOT: @llvm.x86.avx2.pbroadcastq.128 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> zeroinitializer return _mm_broadcastq_epi64(a); }