I32 ceilInt(F64 val) { #ifdef USE_SSE4 __m128d t = _mm_set_sd(val); t = _mm_ceil_sd(t, t); I32 i = _mm_cvtsd_si32(t); #elif defined(USE_SSE2) val += 0.5; __m128d t = _mm_set_sd(val); I32 i = (I32)_mm_cvtsd_si32(t); #else I32 i = (I32)core_ceil(val); #endif return i; }
F64 ceil(F64 val) { #ifdef USE_SSE4 __m128d t = _mm_set_sd(val); t = _mm_ceil_sd(t, t); _mm_store_sd(&val, t); #elif defined(USE_SSE2) val += 0.5; __m128d t = _mm_set_sd(val); U32 i = (U32)_mm_cvtsd_si32(t); t = _mm_cvtsi32_sd(t, (int32)i); _mm_store_sd(&val, t); #else val = core_ceil(val); #endif return val; }
__m128d test_mm_ceil_sd(__m128d x, __m128d y) { // CHECK-LABEL: test_mm_ceil_sd // CHECK: call <2 x double> @llvm.x86.sse41.round.sd // CHECK-ASM: roundsd $2, %xmm{{.*}}, %xmm{{.*}} return _mm_ceil_sd(x, y); }
__m128d test_mm_ceil_sd(__m128d x, __m128d y) { // CHECK-LABEL: test_mm_ceil_sd // CHECK: call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 2) return _mm_ceil_sd(x, y); }