I32 ceilInt(F32 val) { #ifdef USE_SSE4 __m128 t = _mm_set_ss(val); t = _mm_ceil_ss(t, t); I32 i = _mm_cvtss_si32(t); #elif defined(USE_SSE2) val += 0.5f; __m128 t = _mm_set_ss(val); I32 i = (I32)_mm_cvtss_si32(t); #else int32 i = (I32)core_ceil(val); #endif return i; }
F32 ceil(F32 val) { #ifdef USE_SSE4 __m128 t = _mm_set_ss(val); t = _mm_ceil_ss(t, t); _mm_store_ss(&val, t); #elif defined(USE_SSE2) val += 0.5f; __m128 t = _mm_set_ss(val); U32 i = (U32)_mm_cvtss_si32(t); t = _mm_cvtsi32_ss(t, (int32)i); _mm_store_ss(&val, t); #else val = (F32)core_ceil(val); #endif return val; }
__m128 test_mm_ceil_ss(__m128 x, __m128 y) { // CHECK-LABEL: test_mm_ceil_ss // CHECK: call <4 x float> @llvm.x86.sse41.round.ss // CHECK-ASM: roundss $2, %xmm{{.*}}, %xmm{{.*}} return _mm_ceil_ss(x, y); }
__m128 test_mm_ceil_ss(__m128 x, __m128 y) { // CHECK-LABEL: test_mm_ceil_ss // CHECK: call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 2) return _mm_ceil_ss(x, y); }