I32 roundInt(F64 val) { #ifdef USE_SSE4 __m128d t = _mm_set_sd(val); t = _mm_round_sd(t, t, _MM_FROUND_TO_NEAREST_INT); I32 i = _mm_cvtsd_si32(t); #elif defined(USE_SSE2) __m128d t = _mm_set_sd(val); I32 i = (I32)_mm_cvtsd_si32(t); #else I32 i = (I32)core_floor(val + 0.5); #endif return i; }
I32 ceilInt(F64 val) { #ifdef USE_SSE4 __m128d t = _mm_set_sd(val); t = _mm_ceil_sd(t, t); I32 i = _mm_cvtsd_si32(t); #elif defined(USE_SSE2) val += 0.5; __m128d t = _mm_set_sd(val); I32 i = (I32)_mm_cvtsd_si32(t); #else I32 i = (I32)core_ceil(val); #endif return i; }
I32 floorInt(F64 val) { #ifdef USE_SSE4 __m128d t = _mm_set_sd(val); t = _mm_floor_sd(t, t); I32 i = _mm_cvtsd_si32(t); #elif defined(USE_SSE2) val += -0.5; __m128d t = _mm_set_sd(val); I32 i = (I32)_mm_cvtsd_si32(t); #else I32 i = (I32)core_floor(val); #endif return i; }
/** @brief Rounds floating-point number to the nearest integer @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the result is not defined. */ CV_INLINE int cvRound( double value ) { #if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__) __m128d t = _mm_set_sd( value ); return _mm_cvtsd_si32(t); #elif defined _MSC_VER && defined _M_IX86 int t; __asm { fld value; fistp t; } return t; #elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \ defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION TEGRA_ROUND_DBL(value); #elif defined CV_ICC || defined __GNUC__ # if defined ARM_ROUND_DBL ARM_ROUND_DBL(value); # else return (int)lrint(value); # endif #else /* it's ok if round does not comply with IEEE754 standard; the tests should allow +/-1 difference when the tested functions use round */ return (int)(value + (value >= 0 ? 0.5 : -0.5)); #endif }
int test_mm_cvtsd_si32(__m128d A) { // DAG-LABEL: test_mm_cvtsd_si32 // DAG: call i32 @llvm.x86.sse2.cvtsd2si // // ASM-LABEL: test_mm_cvtsd_si32 // ASM: cvtsd2si return _mm_cvtsd_si32(A); }
/** @brief Rounds floating-point number to the nearest integer not smaller than the original. The function computes an integer i such that: \f[i \le \texttt{value} < i+1\f] @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the result is not defined. */ CV_INLINE int cvCeil( double value ) { #if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__) __m128d t = _mm_set_sd( value ); int i = _mm_cvtsd_si32(t); return i + _mm_movemask_pd(_mm_cmplt_sd(_mm_cvtsi32_sd(t,i), t)); #elif defined __GNUC__ int i = (int)value; return i + (i < value); #else int i = cvRound(value); float diff = (float)(i - value); return i + (diff < 0); #endif }
F64 round(F64 val) { #ifdef USE_SSE4 __m128d t = _mm_set_sd(val); t = _mm_round_sd(t, t, _MM_FROUND_TO_NEAREST_INT); _mm_store_sd(&val, t); #elif defined(USE_SSE2) __m128d t = _mm_set_sd(val); U32 i = (U32)_mm_cvtsd_si32(t); t = _mm_cvtsi32_sd(t, (int32)i); _mm_store_sd(&val, t); #else val = core_floor(val + 0.5); #endif return val; }
F64 ceil(F64 val) { #ifdef USE_SSE4 __m128d t = _mm_set_sd(val); t = _mm_ceil_sd(t, t); _mm_store_sd(&val, t); #elif defined(USE_SSE2) val += 0.5; __m128d t = _mm_set_sd(val); U32 i = (U32)_mm_cvtsd_si32(t); t = _mm_cvtsi32_sd(t, (int32)i); _mm_store_sd(&val, t); #else val = core_ceil(val); #endif return val; }
static inline int lrint(double d) { return _mm_cvtsd_si32(_mm_load_sd(&d)); }