/*Transpose 4 vectors with 4 16-bit vectors.*/ OD_SIMD_INLINE void od_transpose16x4(__m64 *t0, __m64 *t1, __m64 *t2, __m64 *t3) { __m64 a; __m64 b; __m64 c; __m64 d; a = _mm_unpacklo_pi16(*t0, *t1); b = _mm_unpacklo_pi16(*t2, *t3); c = _mm_unpackhi_pi16(*t0, *t1); d = _mm_unpackhi_pi16(*t2, *t3); *t0 = _mm_unpacklo_pi32(a, b); *t1 = _mm_unpackhi_pi32(a, b); *t2 = _mm_unpacklo_pi32(c, d); *t3 = _mm_unpackhi_pi32(c, d); }
__m64 test80(__m64 a, __m64 b) { // CHECK: punpckhdq return _mm_unpackhi_pi32(a, b); }
__m64 test_mm_unpackhi_pi32(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_unpackhi_pi32 // CHECK: call x86_mmx @llvm.x86.mmx.punpckhdq return _mm_unpackhi_pi32(a, b); }