hwf_manager::hwf_manager() : m_mpz_manager(m_mpq_manager) { #ifdef _WINDOWS #if defined(_AMD64_) || defined(_M_IA64) // Precision control is not supported on x64. // See: http://msdn.microsoft.com/en-us/library/e9b52ceh(VS.110).aspx // CMW: I think this is okay though, the compiler will chose the right instructions // (the x64/SSE2 FPU has separate instructions for different precisions). #else // Setting the precision should only be required on the x87, but it won't hurt to do it anyways. // _PC_53 means double precision (53 significand bits). For extended precision use _PC_64. #ifndef USE_INTRINSICS __control87_2(_PC_53, _MCW_PC, &x86_state, &sse2_state); #endif #endif #else // OSX/Linux: Nothing. #endif // We only set the precision of the FPU here in the constructor. At the moment, there are no // other parts of the code that could overwrite this, and Windows takes care of context switches. // CMW: I'm not sure what happens on CPUs with hyper-threading (since the FPU is shared). // I have yet to discover whether Linux and OSX save the FPU state when switching context. // As long as we stick to using the SSE2 FPU though, there shouldn't be any problems with respect // to the precision (not sure about the rounding modes though). }
physx::shdfnd::FPUGuard::~FPUGuard() { _clearfp(); #if defined(PX_X64) || defined(PX_WINMODERN) // reset FP state unsigned int cw; _controlfp_s(&cw, *mControlWords, _MCW_ALL); #else // reset FP state unsigned int x87, sse; __control87_2(mControlWords[0], _MCW_ALL, &x87, 0); __control87_2(mControlWords[1], _MCW_ALL, 0, &sse); #endif }
nvidia::shdfnd::FPUGuard::~FPUGuard() { _clearfp(); #if NV_X64 || NV_WINRT // reset FP state unsigned int cw; _controlfp_s(&cw, *mControlWords, _MCW_ALL); #else // reset FP state unsigned int x87, sse; __control87_2(mControlWords[0], _MCW_ALL, &x87, 0); __control87_2(mControlWords[1], _MCW_ALL, 0, &sse); #endif }
physx::shdfnd::FPUGuard::FPUGuard() { // default plus FTZ and DAZ #if defined(PX_X64) || defined(PX_WINMODERN) // query current control word state _controlfp_s(mControlWords, 0, 0); // set both x87 and sse units to default + DAZ unsigned int cw; _controlfp_s(&cw, _CW_DEFAULT | _DN_FLUSH, _MCW_ALL); #else // query current control word state __control87_2(0, 0, mControlWords, mControlWords+1); // set both x87 and sse units to default + DAZ unsigned int x87, sse; __control87_2(_CW_DEFAULT | _DN_FLUSH, _MCW_ALL, &x87, &sse); #endif }
int main( int argc , char* argv[] ) { bool resample = true; #if 0 unsigned int control_word_x87 , control_word_sse2; __control87_2( _DN_FLUSH , _MCW_DN , &control_word_x87 , &control_word_sse2 ); #endif cmdLineParse( argc-1 , &argv[1] , sizeof(params) / sizeof(cmdLineReadable*) , params , 0 ); char valueString[1024]; for( int i=0 ; i<sizeof(params) / sizeof(cmdLineReadable*) ; i++ ) if( params[i]->set ) { params[i]->writeValue( valueString ); printf( "\t--%s %s\n" , params[i]->name , valueString ); } if( !In.set || !Out.set ) { ShowUsage(argv[0]); return EXIT_FAILURE; } char tmpDir[1024]; if( TempDir.set) sprintf( tmpDir , "TMP=%s" , TempDir.value ); else sprintf( tmpDir , "TMP=" ); _putenv( tmpDir ); int w , h; GetReadSize< float >( In.value , w , h ); int height = 1; while( 2*height<w || height<h ) height <<= 1; if( 2*height==w && height==h ) resample = false; if( resample ) printf( "%d x %d -> %d x %d\n" , w , h , 2*height , height ); IWeight.value = -IWeight.value; IWeight.value *= height / 2; IWeight.value *= (height*2) / 2; IWeight.value /= 4. * M_PI; double t = Time( ); if( HighPrecision.set ) if( HDRLabels.set ) Execute< double , double , __int16 >( height , resample ); else Execute< double , double , unsigned char >( height , resample ); else if( HDRLabels.set ) Execute< float , half , __int16 >( height , resample ); else Execute< float , half , unsigned char >( height , resample ); size_t current,peak; WorkingSetInfo( current , peak ); printf( "Running Time: %f\n" , Time()-t ); printf( "Peak working set: %d MB\n" , peak>>20 ); return EXIT_SUCCESS; }