//--------------------------------------------------------------------- OptimisedUtil* OptimisedUtil::_detectImplementation(void) { // // Some speed test results (averaged number of CPU timestamp (RDTSC) per-function call): // // Dagon SkeletonAnimation sample - softwareVertexSkinning: // // Pentium 4 3.0G HT Athlon XP 2500+ Athlon 64 X2 Dual Core 3800+ // // Shared Buffers, General C 763677 462903 473038 // Shared Buffers, Unrolled SSE 210030 *best* 369762 228328 *best* // Shared Buffers, General SSE 286202 352412 *best* 302796 // // Separated Buffers, General C 762640 464840 478740 // Separated Buffers, Unrolled SSE 219222 *best* 287992 *best* 238770 *best* // Separated Buffers, General SSE 290129 341614 307262 // // PosOnly, General C 388663 257350 262831 // PosOnly, Unrolled SSE 139814 *best* 200323 *best* 168995 *best* // PosOnly, General SSE 172693 213704 175447 // // Another my own test scene - softwareVertexSkinning: // // Pentium P4 3.0G HT Athlon XP 2500+ // // Shared Buffers, General C 74527 - // Shared Buffers, Unrolled SSE 22743 *best* - // Shared Buffers, General SSE 28527 - // // // Note that speed test appears unaligned load/store instruction version // loss performance 5%-10% than aligned load/store version, even if both // of them access to aligned data. Thus, we should use aligned load/store // as soon as possible. // // // We are pick up the implementation based on test results above. // #ifdef __DO_PROFILE__ { static OptimisedUtilProfiler msOptimisedUtilProfiler; return &msOptimisedUtilProfiler; } #else // !__DO_PROFILE__ #if __OGRE_HAVE_SSE if (PlatformInformation::getCpuFeatures() & PlatformInformation::CPU_FEATURE_SSE) { return _getOptimisedUtilSSE(); } else #endif // __OGRE_HAVE_SSE { return _getOptimisedUtilGeneral(); } #endif // __DO_PROFILE__ }
OptimisedUtilProfiler(void) { mOptimisedUtils.push_back(_getOptimisedUtilGeneral()); #if __OGRE_HAVE_SSE if (PlatformInformation::getCpuFeatures() & PlatformInformation::CPU_FEATURE_SSE) { mOptimisedUtils.push_back(_getOptimisedUtilSSE()); } #endif }
OptimisedUtilProfiler(void) { mOptimisedUtils.push_back(_getOptimisedUtilGeneral()); #if __OGRE_HAVE_SSE if (PlatformInformation::getCpuFeatures() & PlatformInformation::CPU_FEATURE_SSE) { mOptimisedUtils.push_back(_getOptimisedUtilSSE()); } //#elif __OGRE_HAVE_VFP // if (PlatformInformation::getCpuFeatures() & PlatformInformation::CPU_FEATURE_VFP) // { // mOptimisedUtils.push_back(_getOptimisedUtilVFP()); // } //#elif __OGRE_HAVE_NEON // if (PlatformInformation::getCpuFeatures() & PlatformInformation::CPU_FEATURE_NEON) // { // mOptimisedUtils.push_back(_getOptimisedUtilNEON()); // } #endif }