static void check_fpu() { uint32_t f_control; if(_controlfp_s(&f_control, 0, 0) == 0) { uint32_t unused; uint32_t rounding_mode = f_control & _MCW_RC; uint32_t precision_mode = f_control & _MCW_PC; if(rounding_mode != _RC_NEAR) { std::cerr << "Floating point rounding mode is currently '" << ((rounding_mode == _RC_CHOP) ? "chop" : (rounding_mode == _RC_UP) ? "up" : (rounding_mode == _RC_DOWN) ? "down" : (rounding_mode == _RC_NEAR) ? "near" : "unknown") << "' setting to 'near'\n"; if(_controlfp_s(&unused, _RC_NEAR, _MCW_RC)) { std::cerr << "failed to set floating point rounding type to 'near'\n"; } } if(precision_mode != _PC_53) { std::cerr << "Floating point precision mode is currently '" << ((precision_mode == _PC_53) ? "double" : (precision_mode == _PC_24) ? "single" : (precision_mode == _PC_64 ) ? "double extended" : "unknown") << "' setting to 'double'\n"; if(_controlfp_s(&unused, _PC_53, _MCW_PC)) { std::cerr << "failed to set floating point precision type to 'double'\n"; } } } else { std::cerr << "_controlfp_s failed.\n"; } }
void os_init() { __pfnDliNotifyHook2 = delayHook; __cpuid(cpuinfo, 1); #define MMXSSE 0x02800000 if ((cpuinfo[3] & MMXSSE) != MMXSSE) { ::MessageBoxA(NULL, "Mumble requires a SSE capable processor (Pentium 3 / Ahtlon-XP)", "Mumble", MB_OK | MB_ICONERROR); exit(0); } unsigned int currentControl = 0; _controlfp_s(¤tControl, _DN_FLUSH, _MCW_DN); mumble_speex_init(); #ifdef QT_NO_DEBUG #ifdef COMPAT_CLIENT errno_t res = 0; size_t reqSize, bSize; _wgetenv_s(&reqSize, NULL, 0, L"APPDATA"); if (reqSize > 0) { reqSize += strlen("/Mumble/Console11x.txt"); bSize = reqSize; STACKVAR(wchar_t, buff, reqSize+1); _wgetenv_s(&reqSize, buff, bSize, L"APPDATA"); wcscat_s(buff, bSize, L"/Mumble/Console11x.txt"); res = _wfopen_s(&fConsole, buff, L"a+"); }
void physx::shdfnd::enableFPExceptions() { // clear any pending exceptions _clearfp(); // enable all fp exceptions except inexact and underflow (common, benign) _controlfp_s(NULL, PxU32(~_MCW_EM) | _EM_INEXACT | _EM_UNDERFLOW, _MCW_EM); }
void initPredicates() { static char a_c=0; double hf, ck, lc; int e_o; if (a_c) return; else a_c = 1; #ifdef SPECIFY_FP_PRECISION unsigned int old_cfp; _controlfp_s(&old_cfp, _PC_53, MCW_PC); #endif e_o = 1; _eps = _spl = ck = 1.0; hf = 0.5; do { lc=ck; _eps *= hf; if (e_o) _spl *= 2.0; e_o = !e_o; ck = 1.0 + _eps; } while ((ck != 1.0) && (ck != lc)); _spl += 1.0; _reb = (3.0 + 8.0 * _eps) * _eps; _ccwebA = (3.0 + 16.0 * _eps) * _eps; _ccwebB = (2.0 + 12.0 * _eps) * _eps; _ccwebC = (9.0 + 64.0 * _eps) * _eps * _eps; _o3ebA = (7.0 + 56.0 * _eps) * _eps; _o3ebB = (3.0 + 28.0 * _eps) * _eps; _o3ebC = (26.0 + 288.0 * _eps) * _eps * _eps; _iccebA = (10.0 + 96.0 * _eps) * _eps; _iccebB = (4.0 + 48.0 * _eps) * _eps; _iccebC = (44.0 + 576.0 * _eps) * _eps * _eps; _ispebA = (16.0 + 224.0 * _eps) * _eps; _ispebB = (5.0 + 72.0 * _eps) * _eps; _ispebC = (71.0 + 1408.0 * _eps) * _eps * _eps; #ifdef SPECIFY_FP_PRECISION _controlfp_s(&old_cfp, _CW_DEFAULT, MCW_PC); #endif }
void MkFixedFPUSetter::FixPrecision(void) { if (m_ControlWord.GetSize() == 0) { unsigned int cw; _controlfp_s(&cw, _PC_24, MCW_PC); m_ControlWord.Push(cw); } }
void MkFixedFPUSetter::Restore(void) { if (m_ControlWord.GetSize() == 1) { unsigned int cw; m_ControlWord.Pop(cw); _controlfp_s(&cw, _CW_DEFAULT, MCW_PC); } }
Scoped_FPU_exception_control::Scoped_FPU_exception_control(unsigned int exception_mask) : m_exception_mask(exception_mask & _MCW_EM) { assert((exception_mask & ~_MCW_EM) == 0); errno_t err = _controlfp_s(&m_original_control, 0, 0); PortableRuntime::check_exception(err == 0); m_original_control &= m_exception_mask; }
physx::shdfnd::FPUGuard::FPUGuard() { // default plus FTZ and DAZ #if defined(PX_X64) || defined(PX_WINMODERN) // query current control word state _controlfp_s(mControlWords, 0, 0); // set both x87 and sse units to default + DAZ unsigned int cw; _controlfp_s(&cw, _CW_DEFAULT | _DN_FLUSH, _MCW_ALL); #else // query current control word state __control87_2(0, 0, mControlWords, mControlWords+1); // set both x87 and sse units to default + DAZ unsigned int x87, sse; __control87_2(_CW_DEFAULT | _DN_FLUSH, _MCW_ALL, &x87, &sse); #endif }
void sigfpe_test() { // Code taken from http://www.devx.com/cplus/Article/34993/1954 //Set the x86 floating-point control word according to what //exceptions you want to trap. _clearfp(); //Always call _clearfp before setting the control //word //Because the second parameter in the following call is 0, it //only returns the floating-point control word unsigned int cw; #if _MSC_VER<1400 cw = _controlfp(0, 0); //Get the default control #else _controlfp_s(&cw, 0, 0); //Get the default control #endif //word //Set the exception masks off for exceptions that you want to //trap. When a mask bit is set, the corresponding floating-point //exception is //blocked from being generating. cw &=~(EM_OVERFLOW|EM_UNDERFLOW|EM_ZERODIVIDE| EM_DENORMAL|EM_INVALID); //For any bit in the second parameter (mask) that is 1, the //corresponding bit in the first parameter is used to update //the control word. unsigned int cwOriginal = 0; #if _MSC_VER<1400 cwOriginal = _controlfp(cw, MCW_EM); //Set it. #else _controlfp_s(&cwOriginal, cw, MCW_EM); //Set it. #endif //MCW_EM is defined in float.h. // Divide by zero float a = 1.0f; float b = 0.0f; float c = a/b; c; //Restore the original value when done: //_controlfp_s(cwOriginal, MCW_EM); }
FlushToZero::~FlushToZero() { #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) unsigned int new_state; _controlfp_s(&new_state, _MCW_DN, previous_state); #elif defined(__APPLE__) fesetenv(&previous_state); #elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) _mm_setcsr(_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK); #endif }
static void fpesetup(struct sigaction *action) { #if defined(__linux__) || defined(_WIN32) || defined(OSX_SSE_FPE) action->sa_handler = fpehandler; sigaction(SIGFPE, action, NULL); # if defined(__linux__) && defined(__GNUC__) feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); # endif /* defined(__linux__) && defined(__GNUC__) */ # if defined(OSX_SSE_FPE) return; /* causes issues */ /* OSX uses SSE for floating point by default, so here * use SSE instructions to throw floating point exceptions */ _MM_SET_EXCEPTION_MASK(_MM_MASK_MASK & ~(_MM_MASK_OVERFLOW | _MM_MASK_INVALID | _MM_MASK_DIV_ZERO)); # endif /* OSX_SSE_FPE */ # if defined(_WIN32) && defined(_MSC_VER) _controlfp_s(NULL, 0, _MCW_EM); /* enables all fp exceptions */ _controlfp_s(NULL, _EM_DENORMAL | _EM_UNDERFLOW | _EM_INEXACT, _MCW_EM); /* hide the ones we don't care about */ # endif /* _WIN32 && _MSC_VER */ #endif }
void Scoped_FPU_exception_control::disable(unsigned int fpu_exceptions) { #if defined(_MSC_VER) assert(((m_exception_mask | fpu_exceptions) & ~m_exception_mask) == 0); // Setting the bit enables masking of exception. errno_t err = _controlfp_s(nullptr, fpu_exceptions, m_exception_mask & fpu_exceptions); PortableRuntime::check_exception(err == 0); #else #error No platform support for FPU exception control. #endif }
FlushToZero::FlushToZero() { #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) _controlfp_s(&previous_state, _MCW_DN, _DN_FLUSH); #elif defined(__APPLE__) fegetenv(&previous_state); fesetenv(FE_DFL_DISABLE_SSE_DENORMS_ENV); #elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) previous_state = _mm_getcsr() & _MM_DENORMALS_ZERO_MASK; _mm_setcsr(_mm_getcsr() | (_MM_DENORMALS_ZERO_ON)); #endif }
unsigned int Scoped_FPU_exception_control::current_control() const { #if defined(_MSC_VER) unsigned int control; errno_t err = _controlfp_s(&control, 0, 0); PortableRuntime::check_exception(err == 0); return control &= m_exception_mask; #else #error No platform support for FPU exception control. #endif }
AutoFPUPrecision::AutoFPUPrecision(EA::WebKit::FPUPrecision precisionDesired) { unsigned int controlWord = 0; switch (precisionDesired) { case kFPUPrecisionExtended: controlWord = _PC_64; break; case kFPUPrecisionDouble: controlWord = _PC_53; break; case kFPUPrecisionSingle: controlWord = _PC_24; break; default: EAW_ASSERT_MSG(false,"Invalid FPUPrecision specified"); break; } _controlfp_s(&mSavedControlWord, 0, 0); //Save original Control word _controlfp_s(NULL, controlWord, MCW_PC);//Set the higher precision }
Scoped_FPU_exception_control::~Scoped_FPU_exception_control() { #if defined(_MSC_VER) // Clear pending FPU exceptions, so enabling won't trigger them. _clearfp(); errno_t err = _controlfp_s(nullptr, m_original_control, m_exception_mask); (err); // Prevent unreferenced parameter in Release build. assert(err == 0); #else #error No platform support for FPU exception control. #endif }
int __stdcall WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nShowCmd) { unsigned old_fp_state; _controlfp_s(&old_fp_state, _PC_53, _MCW_PC); /*Array<int> test1; std::vector<int> test2; LARGE_INTEGER pc1,pc2; QueryPerformanceCounter(&pc1); for (int i=0;i<9999999;++i) test1.Add(i); QueryPerformanceCounter(&pc2); auto d1 = pc2.QuadPart - pc1.QuadPart; QueryPerformanceCounter(&pc1); for (int i=0;i<9999999;++i) test2.push_back(i); QueryPerformanceCounter(&pc2); auto d2 = pc2.QuadPart - pc1.QuadPart; char debug_str[256]; sprintf(debug_str,"speed1:%llu - speed2:%llu\n",d1,d2); OutputDebugString(debug_str); sprintf(debug_str,"size1:%d - size2:%d\n",test1.size(), test2.capacity()); OutputDebugString(debug_str);*/ HeapSetInformation(NULL, HeapEnableTerminationOnCorruption, NULL, 0); MSG msg; //cdioInit(); //cdioTest(); //cdioDeinit(); if (SUCCEEDED(CoInitialize(NULL))) { my_app::DisplayWindow display_window; display_window.Initialize(); do { if (PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) { TranslateMessage(&msg); DispatchMessage(&msg); } else { //display_window.Step(); } } while(msg.message!=WM_QUIT); CoUninitialize(); } //Return the exit code to the system. return static_cast<int>(msg.wParam); }
/* set floating point exception stuff * this stuff is from blender project. */ static void _glhckSetupFPE(void) { #if defined(__linux__) || defined(_WIN32) || defined(OSX_SSE_FPE) /* zealous but makes float issues a heck of a lot easier to find! * set breakpoints on fpe_handler */ signal(SIGFPE, _glhckFpeHandler); # if defined(__linux__) && defined(__GNUC__) feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); # endif /* defined(__linux__) && defined(__GNUC__) */ # if defined(OSX_SSE_FPE) return; /* causes issues */ /* OSX uses SSE for floating point by default, so here * use SSE instructions to throw floating point exceptions */ _MM_SET_EXCEPTION_MASK(_MM_MASK_MASK & ~ (_MM_MASK_OVERFLOW | _MM_MASK_INVALID | _MM_MASK_DIV_ZERO)); # endif /* OSX_SSE_FPE */ # if defined(_WIN32) && defined(_MSC_VER) _controlfp_s(NULL, 0, _MCW_EM); /* enables all fp exceptions */ _controlfp_s(NULL, _EM_DENORMAL | _EM_UNDERFLOW | _EM_INEXACT, _MCW_EM); /* hide the ones we don't care about */ # endif /* _WIN32 && _MSC_VER */ #endif }
void Scoped_FPU_exception_control::enable(unsigned int fpu_exceptions) { #if defined(_MSC_VER) assert(((m_exception_mask | fpu_exceptions) & ~m_exception_mask) == 0); // Clear pending FPU exceptions, so enabling won't trigger them. _clearfp(); // Clearing the bit enables exception. errno_t err = _controlfp_s(nullptr, ~fpu_exceptions, m_exception_mask & fpu_exceptions); PortableRuntime::check_exception(err == 0); #else #error No platform support for FPU exception control. #endif }
int gsl_ieee_set_mode (int precision, int rounding, int exception_mask) { unsigned int old, mode = _DN_SAVE, mask = _MCW_DN | _MCW_RC | _MCW_EM; switch(precision) { case GSL_IEEE_SINGLE_PRECISION: mode |= _PC_24; break; case GSL_IEEE_EXTENDED_PRECISION: mode |= _PC_64; break; case GSL_IEEE_DOUBLE_PRECISION: default: mode |= _PC_53; } /* precison control is disabled on Windows x64 with MSVC but is allowed by the Intel compiler */ #if !defined( _WIN64 ) || defined( __ICL ) mask |= _MCW_PC; #endif switch(rounding) { case GSL_IEEE_ROUND_DOWN: mode |= _RC_DOWN; break; case GSL_IEEE_ROUND_UP: mode |= _RC_UP; break; case GSL_IEEE_ROUND_TO_ZERO: mode |= _RC_CHOP; break; case GSL_IEEE_ROUND_TO_NEAREST: default: mode |= _RC_NEAR; } if(exception_mask & GSL_IEEE_MASK_INVALID) mode |= _EM_INVALID; if(exception_mask & GSL_IEEE_MASK_DENORMALIZED) mode |= _EM_DENORMAL; if(exception_mask & GSL_IEEE_MASK_DIVISION_BY_ZERO) mode |= _EM_ZERODIVIDE; if(exception_mask & GSL_IEEE_MASK_OVERFLOW) mode |= _EM_OVERFLOW; if(exception_mask & GSL_IEEE_MASK_UNDERFLOW) mode |= _EM_UNDERFLOW; if(exception_mask & GSL_IEEE_TRAP_INEXACT) mode &= ~_EM_INEXACT; else mode |= _EM_INEXACT; _clearfp(); _controlfp_s(&old, mode, mask); return GSL_SUCCESS; }
physx::shdfnd::FPUGuard::~FPUGuard() { _clearfp(); #if defined(PX_X64) || defined(PX_WINMODERN) // reset FP state unsigned int cw; _controlfp_s(&cw, *mControlWords, _MCW_ALL); #else // reset FP state unsigned int x87, sse; __control87_2(mControlWords[0], _MCW_ALL, &x87, 0); __control87_2(mControlWords[1], _MCW_ALL, 0, &sse); #endif }
nvidia::shdfnd::FPUGuard::~FPUGuard() { _clearfp(); #if NV_X64 || NV_WINRT // reset FP state unsigned int cw; _controlfp_s(&cw, *mControlWords, _MCW_ALL); #else // reset FP state unsigned int x87, sse; __control87_2(mControlWords[0], _MCW_ALL, &x87, 0); __control87_2(mControlWords[1], _MCW_ALL, 0, &sse); #endif }
FPUFlags FPUFlags::getCurrent () { unsigned int currentControl; const unsigned int newControl = 0; const unsigned int mask = 0; errno_t result = _controlfp_s (¤tControl, newControl, mask); if (result != 0) Throw (std::runtime_error ("error in _controlfp_s")); FPUFlags flags; flags.setMaskNaNs ((currentControl & _EM_INVALID) == _EM_INVALID); flags.setMaskDenormals ((currentControl & _EM_DENORMAL) == _EM_DENORMAL); flags.setMaskZeroDivides ((currentControl & _EM_ZERODIVIDE) == _EM_ZERODIVIDE); flags.setMaskOverflows ((currentControl & _EM_OVERFLOW) == _EM_OVERFLOW); flags.setMaskUnderflows ((currentControl & _EM_UNDERFLOW) == _EM_UNDERFLOW); //flags.setMaskInexacts ((currentControl & _EM_INEXACT) == _EM_INEXACT); flags.setFlushDenormals ((currentControl & _DN_FLUSH) == _DN_FLUSH); flags.setInfinitySigned ((currentControl & _IC_AFFINE) == _IC_AFFINE); Rounding rounding = roundDown; switch (currentControl & _MCW_RC) { case _RC_CHOP: rounding = roundChop; break; case _RC_UP: rounding = roundUp; break; case _RC_DOWN: rounding = roundDown; break; case _RC_NEAR: rounding = roundNear; break; default: Throw (std::runtime_error ("unknown rounding in _controlfp_s")); }; flags.setRounding (rounding); Precision precision = bits64; switch (currentControl & _MCW_PC ) { case _PC_64: precision = bits64; break; case _PC_53: precision = bits53; break; case _PC_24: precision = bits24; break; default: Throw (std::runtime_error ("unknown precision in _controlfp_s")); }; flags.setPrecision (precision); return flags; }
void FPUFlags::setCurrent (const FPUFlags& flags) { unsigned int newControl = 0; unsigned int mask = 0; setControl (flags.getMaskNaNs(), newControl, mask, _EM_INVALID); setControl (flags.getMaskDenormals(), newControl, mask, _EM_DENORMAL); setControl (flags.getMaskZeroDivides(), newControl, mask, _EM_ZERODIVIDE); setControl (flags.getMaskOverflows(), newControl, mask, _EM_OVERFLOW); setControl (flags.getMaskUnderflows(), newControl, mask, _EM_UNDERFLOW); //setControl (flags.getMaskInexacts(), newControl, mask, _EM_INEXACT); setControl (flags.getFlushDenormals(), newControl, mask, _DN_FLUSH); setControl (flags.getInfinitySigned(), newControl, mask, _IC_AFFINE); if (flags.getRounding().is_set ()) { Rounding rounding = flags.getRounding().value (); switch (rounding) { case roundChop: mask |= _MCW_RC; newControl |= _RC_CHOP; break; case roundUp: mask |= _MCW_RC; newControl |= _RC_UP; break; case roundDown: mask |= _MCW_RC; newControl |= _RC_DOWN; break; case roundNear: mask |= _MCW_RC; newControl |= _RC_NEAR; break; } } if (flags.getPrecision().is_set ()) { switch (flags.getPrecision().value ()) { case bits64: mask |= _MCW_PC; newControl |= _PC_64; break; case bits53: mask |= _MCW_PC; newControl |= _PC_53; break; case bits24: mask |= _MCW_PC; newControl |= _PC_24; break; } } unsigned int currentControl; errno_t result = _controlfp_s (¤tControl, newControl, mask); if (result != 0) Throw (std::runtime_error ("error in _controlfp_s")); }
void sFpuControl(int input) { uint mask = _MCW_DN | _MCW_EM; uint flags = _EM_OVERFLOW | _EM_UNDERFLOW; if(input & sFC_DenormalDisable) flags |= _DN_FLUSH; if(!(input & sFC_DenormalException)) flags |= _EM_DENORMAL; if(!(input & sFC_GeneralException)) flags |= _EM_INVALID | _EM_ZERODIVIDE; if(!(input & sFC_InexcactException)) flags |= _EM_INEXACT; switch(flags & sFC_PrecisionMask) { case sFC_DefaultPrecision: break; case sFC_SinglePrecision: mask |= _MCW_PC; flags |= _PC_24; break; case sFC_DoublePrecision: mask |= _MCW_PC; flags |= _PC_53; break; case sFC_ExtendedPrecision: mask |= _MCW_PC; flags |= _PC_64; break; } uint old; _clearfp(); _controlfp_s(&old,flags,mask); }
void os_init() { __pfnDliNotifyHook2 = delayHook; __cpuid(cpuinfo, 1); #define MMXSSE 0x02800000 if ((cpuinfo[3] & MMXSSE) != MMXSSE) { ::MessageBoxA(NULL, "Mumble requires a SSE capable processor (Pentium 3 / Ahtlon-XP)", "Mumble", MB_OK | MB_ICONERROR); exit(0); } OSVERSIONINFOEXW ovi; memset(&ovi, 0, sizeof(ovi)); ovi.dwOSVersionInfoSize = sizeof(ovi); GetVersionEx(reinterpret_cast<OSVERSIONINFOW *>(&ovi)); bIsWin7 = (ovi.dwMajorVersion >= 7) || ((ovi.dwMajorVersion == 6) &&(ovi.dwBuildNumber >= 7100)); bIsVistaSP1 = (ovi.dwMajorVersion >= 7) || ((ovi.dwMajorVersion == 6) &&(ovi.dwBuildNumber >= 6001)); unsigned int currentControl = 0; _controlfp_s(¤tControl, _DN_FLUSH, _MCW_DN); SetHeapOptions(); mumble_speex_init(); #ifdef QT_NO_DEBUG QString console = g.qdBasePath.filePath(QLatin1String("Console.txt")); fConsole = _wfsopen(console.toStdWString().c_str(), L"a+", _SH_DENYWR); if (fConsole) qInstallMsgHandler(mumbleMessageOutput); QString hash; QFile f(qApp->applicationFilePath()); if (! f.open(QIODevice::ReadOnly)) { qWarning("VersionCheck: Failed to open binary"); } else { QByteArray a = f.readAll(); if (a.size() > 0) { QCryptographicHash qch(QCryptographicHash::Sha1); qch.addData(a); hash = QLatin1String(qch.result().toHex()); } } QString comment = QString::fromLatin1("%1\n%2\n%3").arg(QString::fromLatin1(MUMBLE_RELEASE), QString::fromLatin1(MUMTEXT(MUMBLE_VERSION_STRING)), hash); wcscpy_s(wcComment, PATH_MAX, comment.toStdWString().c_str()); musComment.Type = CommentStreamW; musComment.Buffer = wcComment; musComment.BufferSize = wcslen(wcComment) * sizeof(wchar_t); QString dump = g.qdBasePath.filePath(QLatin1String("mumble.dmp")); QFileInfo fi(dump); QDir::root().mkpath(fi.absolutePath()); if (wcscpy_s(wcCrashDumpPath, PATH_MAX, dump.toStdWString().c_str()) == 0) SetUnhandledExceptionFilter(MumbleUnhandledExceptionFilter); // Increase our priority class to live alongside games. if (!SetPriorityClass(GetCurrentProcess(),HIGH_PRIORITY_CLASS)) qWarning("Application: Failed to set priority!"); #endif g.qdBasePath.mkpath(QLatin1String("Snapshots")); if (bIsWin7) SetCurrentProcessExplicitAppUserModelID(L"net.sourceforge.mumble.Mumble"); }
bool TestHaarCascadeApplication::process() { #if defined(__APPLE) return true; #endif NCVStatus ncvStat; bool rcode = false; Ncv32u numStages, numNodes, numFeatures; ncvStat = ncvHaarGetClassifierSize(this->cascadeName, numStages, numNodes, numFeatures); ncvAssertReturn(ncvStat == NCV_SUCCESS, false); NCVVectorAlloc<HaarStage64> h_HaarStages(*this->allocatorCPU.get(), numStages); ncvAssertReturn(h_HaarStages.isMemAllocated(), false); NCVVectorAlloc<HaarClassifierNode128> h_HaarNodes(*this->allocatorCPU.get(), numNodes); ncvAssertReturn(h_HaarNodes.isMemAllocated(), false); NCVVectorAlloc<HaarFeature64> h_HaarFeatures(*this->allocatorCPU.get(), numFeatures); ncvAssertReturn(h_HaarFeatures.isMemAllocated(), false); NCVVectorAlloc<HaarStage64> d_HaarStages(*this->allocatorGPU.get(), numStages); ncvAssertReturn(d_HaarStages.isMemAllocated(), false); NCVVectorAlloc<HaarClassifierNode128> d_HaarNodes(*this->allocatorGPU.get(), numNodes); ncvAssertReturn(d_HaarNodes.isMemAllocated(), false); NCVVectorAlloc<HaarFeature64> d_HaarFeatures(*this->allocatorGPU.get(), numFeatures); ncvAssertReturn(d_HaarFeatures.isMemAllocated(), false); HaarClassifierCascadeDescriptor haar; haar.ClassifierSize.width = haar.ClassifierSize.height = 1; haar.bNeedsTiltedII = false; haar.NumClassifierRootNodes = numNodes; haar.NumClassifierTotalNodes = numNodes; haar.NumFeatures = numFeatures; haar.NumStages = numStages; NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting()); NCV_SKIP_COND_BEGIN ncvStat = ncvHaarLoadFromFile_host(this->cascadeName, haar, h_HaarStages, h_HaarNodes, h_HaarFeatures); ncvAssertReturn(ncvStat == NCV_SUCCESS, false); ncvAssertReturn(NCV_SUCCESS == h_HaarStages.copySolid(d_HaarStages, 0), false); ncvAssertReturn(NCV_SUCCESS == h_HaarNodes.copySolid(d_HaarNodes, 0), false); ncvAssertReturn(NCV_SUCCESS == h_HaarFeatures.copySolid(d_HaarFeatures, 0), false); ncvAssertCUDAReturn(cudaStreamSynchronize(0), false); NCV_SKIP_COND_END NcvSize32s srcRoi, srcIIRoi, searchRoi; srcRoi.width = this->width; srcRoi.height = this->height; srcIIRoi.width = srcRoi.width + 1; srcIIRoi.height = srcRoi.height + 1; searchRoi.width = srcIIRoi.width - haar.ClassifierSize.width; searchRoi.height = srcIIRoi.height - haar.ClassifierSize.height; if (searchRoi.width <= 0 || searchRoi.height <= 0) { return false; } NcvSize32u searchRoiU(searchRoi.width, searchRoi.height); NCVMatrixAlloc<Ncv8u> d_img(*this->allocatorGPU.get(), this->width, this->height); ncvAssertReturn(d_img.isMemAllocated(), false); NCVMatrixAlloc<Ncv8u> h_img(*this->allocatorCPU.get(), this->width, this->height); ncvAssertReturn(h_img.isMemAllocated(), false); Ncv32u integralWidth = this->width + 1; Ncv32u integralHeight = this->height + 1; NCVMatrixAlloc<Ncv32u> d_integralImage(*this->allocatorGPU.get(), integralWidth, integralHeight); ncvAssertReturn(d_integralImage.isMemAllocated(), false); NCVMatrixAlloc<Ncv64u> d_sqIntegralImage(*this->allocatorGPU.get(), integralWidth, integralHeight); ncvAssertReturn(d_sqIntegralImage.isMemAllocated(), false); NCVMatrixAlloc<Ncv32u> h_integralImage(*this->allocatorCPU.get(), integralWidth, integralHeight); ncvAssertReturn(h_integralImage.isMemAllocated(), false); NCVMatrixAlloc<Ncv64u> h_sqIntegralImage(*this->allocatorCPU.get(), integralWidth, integralHeight); ncvAssertReturn(h_sqIntegralImage.isMemAllocated(), false); NCVMatrixAlloc<Ncv32f> d_rectStdDev(*this->allocatorGPU.get(), this->width, this->height); ncvAssertReturn(d_rectStdDev.isMemAllocated(), false); NCVMatrixAlloc<Ncv32u> d_pixelMask(*this->allocatorGPU.get(), this->width, this->height); ncvAssertReturn(d_pixelMask.isMemAllocated(), false); NCVMatrixAlloc<Ncv32f> h_rectStdDev(*this->allocatorCPU.get(), this->width, this->height); ncvAssertReturn(h_rectStdDev.isMemAllocated(), false); NCVMatrixAlloc<Ncv32u> h_pixelMask(*this->allocatorCPU.get(), this->width, this->height); ncvAssertReturn(h_pixelMask.isMemAllocated(), false); NCVVectorAlloc<NcvRect32u> d_hypotheses(*this->allocatorGPU.get(), this->width * this->height); ncvAssertReturn(d_hypotheses.isMemAllocated(), false); NCVVectorAlloc<NcvRect32u> h_hypotheses(*this->allocatorCPU.get(), this->width * this->height); ncvAssertReturn(h_hypotheses.isMemAllocated(), false); NCVStatus nppStat; Ncv32u szTmpBufIntegral, szTmpBufSqIntegral; nppStat = nppiStIntegralGetSize_8u32u(NcvSize32u(this->width, this->height), &szTmpBufIntegral, this->devProp); ncvAssertReturn(nppStat == NPPST_SUCCESS, false); nppStat = nppiStSqrIntegralGetSize_8u64u(NcvSize32u(this->width, this->height), &szTmpBufSqIntegral, this->devProp); ncvAssertReturn(nppStat == NPPST_SUCCESS, false); NCVVectorAlloc<Ncv8u> d_tmpIIbuf(*this->allocatorGPU.get(), std::max(szTmpBufIntegral, szTmpBufSqIntegral)); ncvAssertReturn(d_tmpIIbuf.isMemAllocated(), false); Ncv32u detectionsOnThisScale_d = 0; Ncv32u detectionsOnThisScale_h = 0; NCV_SKIP_COND_BEGIN ncvAssertReturn(this->src.fill(h_img), false); ncvStat = h_img.copySolid(d_img, 0); ncvAssertReturn(ncvStat == NCV_SUCCESS, false); ncvAssertCUDAReturn(cudaStreamSynchronize(0), false); nppStat = nppiStIntegral_8u32u_C1R(d_img.ptr(), d_img.pitch(), d_integralImage.ptr(), d_integralImage.pitch(), NcvSize32u(d_img.width(), d_img.height()), d_tmpIIbuf.ptr(), szTmpBufIntegral, this->devProp); ncvAssertReturn(nppStat == NPPST_SUCCESS, false); nppStat = nppiStSqrIntegral_8u64u_C1R(d_img.ptr(), d_img.pitch(), d_sqIntegralImage.ptr(), d_sqIntegralImage.pitch(), NcvSize32u(d_img.width(), d_img.height()), d_tmpIIbuf.ptr(), szTmpBufSqIntegral, this->devProp); ncvAssertReturn(nppStat == NPPST_SUCCESS, false); const NcvRect32u rect( HAAR_STDDEV_BORDER, HAAR_STDDEV_BORDER, haar.ClassifierSize.width - 2*HAAR_STDDEV_BORDER, haar.ClassifierSize.height - 2*HAAR_STDDEV_BORDER); nppStat = nppiStRectStdDev_32f_C1R( d_integralImage.ptr(), d_integralImage.pitch(), d_sqIntegralImage.ptr(), d_sqIntegralImage.pitch(), d_rectStdDev.ptr(), d_rectStdDev.pitch(), NcvSize32u(searchRoi.width, searchRoi.height), rect, 1.0f, true); ncvAssertReturn(nppStat == NPPST_SUCCESS, false); ncvStat = d_integralImage.copySolid(h_integralImage, 0); ncvAssertReturn(ncvStat == NCV_SUCCESS, false); ncvStat = d_rectStdDev.copySolid(h_rectStdDev, 0); ncvAssertReturn(ncvStat == NCV_SUCCESS, false); for (Ncv32u i=0; i<searchRoiU.height; i++) { for (Ncv32u j=0; j<h_pixelMask.stride(); j++) { if (j<searchRoiU.width) { h_pixelMask.ptr()[i*h_pixelMask.stride()+j] = (i << 16) | j; } else { h_pixelMask.ptr()[i*h_pixelMask.stride()+j] = OBJDET_MASK_ELEMENT_INVALID_32U; } } } ncvAssertReturn(cudaSuccess == cudaStreamSynchronize(0), false); #if !defined(__APPLE__) #if defined(__GNUC__) //http://www.christian-seiler.de/projekte/fpmath/ fpu_control_t fpu_oldcw, fpu_cw; _FPU_GETCW(fpu_oldcw); // store old cw fpu_cw = (fpu_oldcw & ~_FPU_EXTENDED & ~_FPU_DOUBLE & ~_FPU_SINGLE) | _FPU_SINGLE; _FPU_SETCW(fpu_cw); // calculations here ncvStat = ncvApplyHaarClassifierCascade_host( h_integralImage, h_rectStdDev, h_pixelMask, detectionsOnThisScale_h, haar, h_HaarStages, h_HaarNodes, h_HaarFeatures, false, searchRoiU, 1, 1.0f); ncvAssertReturn(ncvStat == NCV_SUCCESS, false); _FPU_SETCW(fpu_oldcw); // restore old cw #else #ifndef _WIN64 Ncv32u fpu_oldcw, fpu_cw; _controlfp_s(&fpu_cw, 0, 0); fpu_oldcw = fpu_cw; _controlfp_s(&fpu_cw, _PC_24, _MCW_PC); #endif ncvStat = ncvApplyHaarClassifierCascade_host( h_integralImage, h_rectStdDev, h_pixelMask, detectionsOnThisScale_h, haar, h_HaarStages, h_HaarNodes, h_HaarFeatures, false, searchRoiU, 1, 1.0f); ncvAssertReturn(ncvStat == NCV_SUCCESS, false); #ifndef _WIN64 _controlfp_s(&fpu_cw, fpu_oldcw, _MCW_PC); #endif #endif #endif NCV_SKIP_COND_END int devId; ncvAssertCUDAReturn(cudaGetDevice(&devId), false); cudaDeviceProp _devProp; ncvAssertCUDAReturn(cudaGetDeviceProperties(&_devProp, devId), false); ncvStat = ncvApplyHaarClassifierCascade_device( d_integralImage, d_rectStdDev, d_pixelMask, detectionsOnThisScale_d, haar, h_HaarStages, d_HaarStages, d_HaarNodes, d_HaarFeatures, false, searchRoiU, 1, 1.0f, *this->allocatorGPU.get(), *this->allocatorCPU.get(), _devProp, 0); ncvAssertReturn(ncvStat == NCV_SUCCESS, false); NCVMatrixAlloc<Ncv32u> h_pixelMask_d(*this->allocatorCPU.get(), this->width, this->height); ncvAssertReturn(h_pixelMask_d.isMemAllocated(), false); //bit-to-bit check bool bLoopVirgin = true; NCV_SKIP_COND_BEGIN ncvStat = d_pixelMask.copySolid(h_pixelMask_d, 0); ncvAssertReturn(ncvStat == NCV_SUCCESS, false); if (detectionsOnThisScale_d != detectionsOnThisScale_h) { bLoopVirgin = false; } else { std::sort(h_pixelMask_d.ptr(), h_pixelMask_d.ptr() + detectionsOnThisScale_d); for (Ncv32u i=0; i<detectionsOnThisScale_d && bLoopVirgin; i++) { if (h_pixelMask.ptr()[i] != h_pixelMask_d.ptr()[i]) { bLoopVirgin = false; } } } NCV_SKIP_COND_END if (bLoopVirgin) { rcode = true; } return rcode; }
void os_init() { __cpuid(cpuinfo, 1); #define MMXSSE 0x02800000 if ((cpuinfo[3] & MMXSSE) != MMXSSE) { ::MessageBoxA(NULL, "Mumble requires a SSE capable processor (Pentium 3 / Ahtlon-XP)", "Mumble", MB_OK | MB_ICONERROR); exit(0); } OSVERSIONINFOEXW ovi; memset(&ovi, 0, sizeof(ovi)); ovi.dwOSVersionInfoSize = sizeof(ovi); GetVersionEx(reinterpret_cast<OSVERSIONINFOW *>(&ovi)); bIsWin7 = (ovi.dwMajorVersion >= 7) || ((ovi.dwMajorVersion == 6) &&(ovi.dwBuildNumber >= 7100)); bIsVistaSP1 = (ovi.dwMajorVersion >= 7) || ((ovi.dwMajorVersion == 6) &&(ovi.dwBuildNumber >= 6001)); #if _MSC_VER == 1800 && defined(_M_X64) // Disable MSVC 2013's FMA-optimized math routines on Windows // versions earlier than Windows 8 (6.2). // There are various issues on OSes that do not support the newer // instructions. // See issue mumble-voip/mumble#1615. if (ovi.dwMajorVersion < 5 || (ovi.dwMajorVersion == 6 && ovi.dwMinorVersion <= 1)) { _set_FMA3_enable(0); } #endif unsigned int currentControl = 0; _controlfp_s(¤tControl, _DN_FLUSH, _MCW_DN); SetHeapOptions(); enableCrashOnCrashes(); mumble_speex_init(); #ifdef QT_NO_DEBUG QString console = g.qdBasePath.filePath(QLatin1String("Console.txt")); fConsole = _wfsopen(console.toStdWString().c_str(), L"a+", _SH_DENYWR); if (fConsole) { #if QT_VERSION >= 0x050000 qInstallMessageHandler(mumbleMessageOutputWithContext); #else qInstallMsgHandler(mumbleMessageOutput); #endif } QString hash; QFile f(qApp->applicationFilePath()); if (! f.open(QIODevice::ReadOnly)) { qWarning("VersionCheck: Failed to open binary"); } else { QByteArray a = f.readAll(); if (a.size() > 0) { QCryptographicHash qch(QCryptographicHash::Sha1); qch.addData(a); hash = QLatin1String(qch.result().toHex()); } } QString comment = QString::fromLatin1("%1\n%2\n%3").arg(QString::fromLatin1(MUMBLE_RELEASE), QString::fromLatin1(MUMTEXT(MUMBLE_VERSION_STRING)), hash); wcscpy_s(wcComment, PATH_MAX, comment.toStdWString().c_str()); musComment.Type = CommentStreamW; musComment.Buffer = wcComment; musComment.BufferSize = static_cast<ULONG>(wcslen(wcComment) * sizeof(wchar_t)); QString dump = g.qdBasePath.filePath(QLatin1String("mumble.dmp")); QFileInfo fi(dump); QDir::root().mkpath(fi.absolutePath()); if (wcscpy_s(wcCrashDumpPath, PATH_MAX, dump.toStdWString().c_str()) == 0) SetUnhandledExceptionFilter(MumbleUnhandledExceptionFilter); #endif g.qdBasePath.mkpath(QLatin1String("Snapshots")); if (bIsWin7) SetCurrentProcessExplicitAppUserModelID(L"net.sourceforge.mumble.Mumble"); }
unsigned int fpe_get_trapped() { unsigned int current_word = 0; _controlfp_s( ¤t_word, 0, 0 ); return ( ~current_word ) & fp_exception_all; }
int fpe_disable_trap( unsigned int except ) { unsigned int curr; errno_t err = _controlfp_s( &curr, except, _MCW_EM ); return err == 0 ? 0 : -1; }