inline __m128i load_aligned_int32(const int8_t* src) { __m128i tmp = _mm_loadl_epi64((const __m128i*)src); #if XSIMD_X86_INSTR_SET >= XSIMD_X86_SSE4_1_VERSION __m128i res = _mm_cvtepi8_epi32(tmp); #else __m128i mask = _mm_cmplt_epi8(tmp, _mm_set1_epi8(0)); __m128i tmp1 = _mm_unpacklo_epi8(tmp, mask); mask = _mm_cmplt_epi16(tmp1, _mm_set1_epi16(0)); __m128i res = _mm_unpacklo_epi16(tmp1, mask); #endif return res; }
static void sse4_1_test (void) { union { __m128i x[NUM / 4]; int i[NUM]; char c[NUM * 4]; } dst, src; int i, sign = 1; for (i = 0; i < NUM; i++) { src.c[(i % 4) + (i / 4) * 16] = i * i * sign; sign = -sign; } for (i = 0; i < NUM; i += 4) dst.x [i / 4] = _mm_cvtepi8_epi32 (src.x [i / 4]); for (i = 0; i < NUM; i++) if (src.c[(i % 4) + (i / 4) * 16] != dst.i[i]) abort (); }
__m128i test_mm_cvtepi8_epi32(__m128i a) { // CHECK-LABEL: test_mm_cvtepi8_epi32 // CHECK: call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> {{.*}}) // CHECK-ASM: pmovsxbd %xmm{{.*}}, %xmm{{.*}} return _mm_cvtepi8_epi32(a); }
__m128i test_mm_cvtepi8_epi32(__m128i a) { // CHECK-LABEL: test_mm_cvtepi8_epi32 // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: sext <4 x i8> {{.*}} to <4 x i32> return _mm_cvtepi8_epi32(a); }