static inline int8_t _mm_hmax_epi8_rpl(__m128i a) { a = _mm_max_epi8(a, _mm_srli_si128(a, 8)); a = _mm_max_epi8(a, _mm_srli_si128(a, 4)); a = _mm_max_epi8(a, _mm_srli_si128(a, 2)); a = _mm_max_epi8(a, _mm_srli_si128(a, 1)); return _mm_extract_epi8(a, 0); }
void nibble_sort_tom(unsigned long *buf) { for (int i = 0; i < TEST_SIZE; ++i) { __m128i x = _mm_and_si128(_mm_set_epi64x(buf[i] >> 4, buf[i]), g_mask); x = S(x, 0); x = S(x, 1); x = S(x, 0); x = S(x, 2); x = S(x, 3); x = S(x, 0); x = S(x, 4); x = S(x, 5); x = S(x, 3); /* Final step is different; the output is in the right layout * for reassembling for the final write. */ const __m128i a0 = _mm_shuffle_epi8(x, g_shuffles[0][0]); const __m128i b0 = _mm_shuffle_epi8(x, g_shuffles[0][1]); const __m128i a1 = _mm_min_epi8(a0, b0); const __m128i b1 = _mm_max_epi8(a0, b0); const __m128i out = _mm_or_si128(a1, _mm_slli_epi64(b1, 4)); _mm_storel_epi64((__m128i *)&buf[i], out); } }
SIMDValue SIMDInt8x16Operation::OpGreaterThanOrEqual(const SIMDValue& aValue, const SIMDValue& bValue) { X86SIMDValue x86Result; X86SIMDValue tmpaValue = X86SIMDValue::ToX86SIMDValue(aValue); X86SIMDValue tmpbValue = X86SIMDValue::ToX86SIMDValue(bValue); x86Result.m128i_value = _mm_max_epi8(tmpaValue.m128i_value, tmpbValue.m128i_value); // max(a,b) == b x86Result.m128i_value = _mm_cmpeq_epi8(tmpaValue.m128i_value, x86Result.m128i_value); // return X86SIMDValue::ToSIMDValue(x86Result); }
static __m128i S(__m128i x, int i) { const __m128i a0 = _mm_shuffle_epi8(x, g_shuffles[i][0]); const __m128i b0 = _mm_shuffle_epi8(x, g_shuffles[i][1]); const __m128i a1 = _mm_min_epi8(a0, b0); const __m128i b1 = _mm_max_epi8(a0, b0); const __m128i a2 = _mm_shuffle_epi8(a1, g_shuffles[i][2]); const __m128i b2 = _mm_shuffle_epi8(b1, g_shuffles[i][3]); return _mm_or_si128(a2, b2); }
__m128i test_mm_max_epi8(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_max_epi8 // CHECK: call <16 x i8> @llvm.x86.sse41.pmaxsb // CHECK-ASM: pmaxsb %xmm{{.*}}, %xmm{{.*}} return _mm_max_epi8(x, y); }
__m128i test_mm_max_epi8(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_max_epi8 // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] return _mm_max_epi8(x, y); }