// TestFunction_Speed //------------------------------------------------------------------------------ void TestProjectGeneration::TestFunction_Speed() const { VSProjectGenerator pg; AStackString<> baseDir( "C:\\Windows\\System32" ); Array< AString > baseDirs; baseDirs.Append( baseDir ); // project name pg.SetProjectName( AStackString<>( "Big" ) ); pg.SetBasePaths( baseDirs ); // platforms Array< VSProjectConfig > configs; VSProjectConfig cfg; cfg.m_Platform = "Win32"; cfg.m_Config = "Debug"; configs.Append( cfg ); // files (about 5,000) Array< AString > files; FileIO::GetFiles( baseDir, AStackString<>( "*.mui" ), true, &files ); FileIO::GetFiles( baseDir, AStackString<>( "*.exe" ), true, &files ); FileIO::GetFiles( baseDir, AStackString<>( "*.dll" ), true, &files ); pg.AddFiles( files ); Array< VSProjectFileType > fileTypes; { VSProjectFileType ft; ft.m_FileType = "CppForm"; ft.m_Pattern = "Code\\Forms\\*.h"; fileTypes.Append( ft ); ft.m_FileType = "CppControl"; ft.m_Pattern = "Controls\\*.h"; fileTypes.Append( ft ); } AStackString<> projectFileName( "C:\\Windows\\System\\dummy.vcxproj" ); { Timer t; pg.GenerateVCXProj( projectFileName, configs, fileTypes ); float time = t.GetElapsed(); OUTPUT( "Gen vcxproj : %2.3fs\n", time ); } { Timer t; pg.GenerateVCXProjFilters( projectFileName ); float time = t.GetElapsed(); OUTPUT( "Gen vcxproj.filters: %2.3fs\n", time ); } }
REGISTER_TESTS_END // TestMSVCPreprocessedOutput //------------------------------------------------------------------------------ void TestIncludeParser::TestMSVCPreprocessedOutput() const { FileStream f; TEST_ASSERT( f.Open( "Data/TestIncludeParser/fbuildcore.msvc.ii", FileStream::READ_ONLY) ) const size_t fileSize = (size_t)f.GetFileSize(); AutoPtr< char > mem( (char *)ALLOC( fileSize + 1 ) ); TEST_ASSERT( f.Read( mem.Get(), fileSize ) == fileSize ); mem.Get()[ fileSize ] = 0; Timer t; const size_t repeatCount( 100 ); for ( size_t i=0; i<repeatCount; ++i ) { CIncludeParser parser; TEST_ASSERT( parser.ParseMSCL_Preprocessed( mem.Get(), fileSize ) ); // check number of includes found to prevent future regressions const Array< AString > & includes = parser.GetIncludes(); TEST_ASSERT( includes.GetSize() == 284 ); #ifdef DEBUG TEST_ASSERT( parser.GetNonUniqueCount() == 381 ); #endif } float time = t.GetElapsed(); OUTPUT( "MSVC : %2.3fs (%2.1f MiB/sec)\n", time, ( (float)( fileSize * repeatCount / ( 1024.0f * 1024.0f ) ) / time ) ); }
// TestClangMSExtensionsPreprocessedOutput //------------------------------------------------------------------------------ void TestIncludeParser::TestClangMSExtensionsPreprocessedOutput() const { FBuild fBuild; // needed fer CleanPath for relative dirs FileStream f; TEST_ASSERT( f.Open( "Data/TestIncludeParser/fbuildcore.clang.ms-extensions.ii", FileStream::READ_ONLY) ) const size_t fileSize = (size_t)f.GetFileSize(); AutoPtr< char > mem( (char *)ALLOC( fileSize + 1 ) ); TEST_ASSERT( f.Read( mem.Get(), fileSize ) == fileSize ); mem.Get()[ fileSize ] = 0; Timer t; const size_t repeatCount( 100 ); for ( size_t i=0; i<repeatCount; ++i ) { CIncludeParser parser; TEST_ASSERT( parser.ParseGCC_Preprocessed( mem.Get(), fileSize ) ); // check number of includes found to prevent future regressions const Array< AString > & includes = parser.GetIncludes(); TEST_ASSERT( includes.GetSize() == 285 ); #ifdef DEBUG TEST_ASSERT( parser.GetNonUniqueCount() == 4758 ); #endif } float time = t.GetElapsed(); OUTPUT( "Clang (ms-extensions): %2.3fs (%2.1f MiB/sec)\n", time, ( (float)( fileSize * repeatCount / ( 1024.0f * 1024.0f ) ) / time ) ); }
int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, PSTR pScmdline, int iCmdshow) { if (!init_log(NULL)) return -1; if(!Init(NULL, hInstance, hPrevInstance, pScmdline, iCmdshow)) { return 1; } MSG msg = {0}; while(WM_QUIT != msg.message) { if(PeekMessage(&msg, nullptr, 0, 0, PM_REMOVE)) { TranslateMessage(&msg); DispatchMessage(&msg); } else { //no messages waiting, step the simulation forward a frame if(time.Tick()) { Update(time.GetElapsed()); Render(); } } } // return this part of the WM_QUIT message to Windows return msg.wParam; }
/*static*/ void FileIO::WorkAroundForWindowsFilePermissionProblem( const AString & fileName ) { // Sometimes after closing a file, subsequent operations on that file will // fail. For example, trying to set the file time, or even another process // opening the file. // // This seems to be a known issue in windows, with multiple potential causes // like Virus scanners and possibly the behaviour of the kernel itself. // // A work-around for this problem is to attempt to open a file we just closed. // This will sometimes fail, but if we retry until it succeeds, we avoid the // problem on the subsequent operation. FileStream f; Timer timer; while ( f.Open( fileName.Get() ) == false ) { Thread::Sleep( 1 ); // timeout so we don't get stuck in here forever if ( timer.GetElapsed() > 1.0f ) { ASSERT( false && "WorkAroundForWindowsFilePermissionProblem Failed!" ); return; } } f.Close(); }
// AllocateFromSystemAllocator //------------------------------------------------------------------------------ /*static*/ float TestSmallBlockAllocator::AllocateFromSystemAllocator( const Array< uint32_t > & allocSizes, const uint32_t repeatCount ) { const size_t numAllocs = allocSizes.GetSize(); Array< void * > allocs( numAllocs, false ); Timer timer; for ( size_t r=0; r<repeatCount; ++r ) { // use malloc for ( uint32_t i=0; i<numAllocs; ++i ) { uint32_t * mem = (uint32_t *)malloc( allocSizes[i] ); allocs.Append( mem ); } // use free for ( uint32_t i=0; i<numAllocs; ++i ) { void * mem = allocs[ i ]; free( mem ); } allocs.Clear(); } return timer.GetElapsed(); }
int LaunchSubProcess( const AString & args ) { // try to make a copy of our exe AStackString<> exeName; Env::GetExePath( exeName ); AStackString<> exeNameCopy( exeName ); exeNameCopy += ".copy"; Timer t; while ( FileIO::FileCopy( exeName.Get(), exeNameCopy.Get() ) == false ) { if ( t.GetElapsed() > 5.0f ) { AStackString<> msg; msg.Format( "Failed to make sub-process copy - error: %u (0x%x)\n\nSrc: %s\nDst: %s\n", Env::GetLastErr(), Env::GetLastErr(), exeName.Get(), exeNameCopy.Get() ); ShowMsgBox( msg.Get() ); return -2; } Thread::Sleep( 100 ); } AStackString<> argsCopy( args ); argsCopy += " -subprocess"; // allow subprocess to access the mutex g_OneProcessMutex.Unlock(); Process p; #if defined( __WINDOWS__ ) p.DisableHandleRedirection(); // TODO:MAC TODO:LINUX is this needed? #endif p.Spawn( exeNameCopy.Get(), argsCopy.Get(), nullptr, nullptr ); p.Detach(); return 0; }
REGISTER_TESTS_END // Validate //------------------------------------------------------------------------------ void TestTimer::Validate() const { Timer t; t.Start(); const uint64_t before = t.GetNow(); Thread::Sleep( 100 ); // sleep for 100ms const float elapsed = t.GetElapsed(); const float elapsedMS = t.GetElapsedMS(); const uint64_t after = t.GetNow(); // some time must have elapsed TEST_ASSERT( after > before ); // sanity check TEST_ASSERT( elapsed >= 0.001f ); // at least 1ms TEST_ASSERT( elapsed < 1.000f ); // some sensible value // sanity check TEST_ASSERT( elapsedMS >= 1.0f ); // at least 1ms TEST_ASSERT( elapsedMS < 1000.0f ); // some sensible value }
//------------------------------------------------------------------------------ void Solver::Advance (float timeStep) { Timer t; t.Start(); updateBlendValues(); mFluidHashTable[LOW]->Fill(mFluidParticles[LOW]->ActiveIDs); mFluidHashTable[HIGH]->Fill(mFluidParticles[HIGH]->ActiveIDs); computeDensity(LOW); computeDensity(HIGH); computeAcceleration(LOW); computeAcceleration(HIGH); integrate(HIGH, timeStep/2.0f); mFluidHashTable[HIGH]->Fill(mFluidParticles[HIGH]->ActiveIDs); computeDensity(HIGH); computeAcceleration(HIGH); integrate(HIGH, timeStep/2.0f); integrate(LOW, timeStep); inject(); t.Stop(); std::cout << "#LOW " << mFluidParticles[LOW]->ActiveIDs.size() << " #HIGH " << mFluidParticles[HIGH]->ActiveIDs.size() << " #TOTAL: " << mFluidParticles[LOW]->ActiveIDs.size() + mFluidParticles[HIGH]->ActiveIDs.size() << "TIME: " << t.GetElapsed() << std::endl; }
// Generate //------------------------------------------------------------------------------ void Report::Generate( const FBuildStats & stats ) { Timer t; // pre-allocate a large string for output m_Output.SetReserved( MEGABYTE ); m_Output.SetLength( 0 ); // generate some common data used in reporting GetLibraryStats( stats ); // build the report CreateHeader(); CreateTitle(); CreateOverview( stats ); DoCPUTimeByType( stats ); DoCacheStats( stats ); DoCPUTimeByLibrary(); DoCPUTimeByItem( stats ); DoIncludes(); CreateFooter(); // patch in time take const float time = t.GetElapsed(); AStackString<> timeTakenBuffer; stats.FormatTime( time, timeTakenBuffer ); char * placeholder = m_Output.Find( "^^^^ " ); memcpy( placeholder, timeTakenBuffer.Get(), timeTakenBuffer.GetLength() ); }
bool Timer::Period(Timer& T, double* t, double period) { if (T.GetElapsed() - *t > 0) { *t += period; return true; } return false; }
// WaitTimeout //------------------------------------------------------------------------------ void TestSemaphore::WaitTimeout() const { Timer t; Semaphore s; s.Wait( 50 ); // wait 50ms // ensure some sensible time has elapsed ASSERT( t.GetElapsed() > 0.025f ); // 25ms (allow wide margin of error) }
// AllocateFromSmallBlockAllocator //------------------------------------------------------------------------------ /*static*/ float TestSmallBlockAllocator::AllocateFromSmallBlockAllocator( const Array< uint32_t > & allocSizes, const uint32_t repeatCount, const bool threadSafe ) { const size_t numAllocs = allocSizes.GetSize(); Array< void * > allocs( numAllocs, false ); Timer timer; if ( threadSafe == false ) { SmallBlockAllocator::SetSingleThreadedMode( true ); } for ( size_t r=0; r<repeatCount; ++r ) { // Use ALLOC for ( uint32_t i=0; i<numAllocs; ++i ) { uint32_t * mem = (uint32_t *)ALLOC( allocSizes[i] ); allocs.Append( mem ); } // Use FREE for ( uint32_t i=0; i<numAllocs; ++i ) { void * mem = allocs[ i ]; FREE( mem ); } allocs.Clear(); } if ( threadSafe == false ) { SmallBlockAllocator::SetSingleThreadedMode( false ); } return timer.GetElapsed(); }
//================================================================================================================================= /// The main function. //================================================================================================================================= int main(int argc, char* argv[]) { // initialize settings to defaults TootleSettings settings; settings.pMeshName = NULL; settings.pViewpointName = NULL; settings.nClustering = 0; settings.nCacheSize = TOOTLE_DEFAULT_VCACHE_SIZE; settings.eWinding = TOOTLE_CW; settings.algorithmChoice = TOOTLE_OPTIMIZE; settings.eVCacheOptimizer = TOOTLE_VCACHE_AUTO; // the auto selection as the default to optimize vertex cache settings.bOptimizeVertexMemory = true; // default value is to optimize the vertex memory settings.bMeasureOverdraw = true; // default is to measure overdraw // parse the command line ParseCommandLine(argc, argv, &settings); // *************************************************** // Load the mesh // *************************************************** // read the mesh from the OBJ file std::vector<ObjVertexFinal> objVertices; std::vector<ObjFace> objFaces; ObjLoader loader; if (!loader.LoadGeometry(settings.pMeshName, objVertices, objFaces)) { std::cerr << "Error loading mesh file: " << settings.pMeshName << std::endl; return 1; } // build buffers containing only the vertex positions and indices, since this is what Tootle requires std::vector<ObjVertex3D> vertices; vertices.resize(objVertices.size()); for (unsigned int i = 0; i < vertices.size(); i++) { vertices[i] = objVertices[i].pos; } std::vector<unsigned int> indices; indices.resize(objFaces.size() * 3); for (unsigned int i = 0; i < indices.size(); i++) { indices[i] = objFaces[ i / 3 ].finalVertexIndices[ i % 3 ]; } // ****************************************** // Load viewpoints if necessary // ****************************************** // read viewpoints if needed std::vector<ObjVertex3D> viewpoints; if (settings.pViewpointName != NULL) { if (!LoadViewpoints(settings.pViewpointName, viewpoints)) { std::cerr << "Unable to load viewpoints from file: " << settings.pViewpointName; return 1; } } // if we didn't get any viewpoints, then use a NULL array const float* pViewpoints = NULL; unsigned int nViewpoints = (unsigned int) viewpoints.size(); if (viewpoints.size() > 0) { pViewpoints = (const float*) &viewpoints[0]; } // ***************************************************************** // Prepare the mesh and initialize stats variables // ***************************************************************** unsigned int nFaces = (unsigned int) indices.size() / 3; unsigned int nVertices = (unsigned int) vertices.size(); float* pfVB = (float*) &vertices[0]; unsigned int* pnIB = (unsigned int*) &indices[0]; unsigned int nStride = 3 * sizeof(float); TootleStats stats; // initialize the timing variables stats.fOptimizeVCacheTime = INVALID_TIME; stats.fClusterMeshTime = INVALID_TIME; stats.fVCacheClustersTime = INVALID_TIME; stats.fOptimizeVCacheAndClusterMeshTime = INVALID_TIME; stats.fOptimizeOverdrawTime = INVALID_TIME; stats.fTootleOptimizeTime = INVALID_TIME; stats.fTootleFastOptimizeTime = INVALID_TIME; stats.fMeasureOverdrawTime = INVALID_TIME; stats.fOptimizeVertexMemoryTime = INVALID_TIME; TootleResult result; // initialize Tootle result = TootleInit(); if (result != TOOTLE_OK) { DisplayTootleErrorMessage(result); return 1; } // measure input VCache efficiency result = TootleMeasureCacheEfficiency(pnIB, nFaces, settings.nCacheSize, &stats.fVCacheIn); if (result != TOOTLE_OK) { DisplayTootleErrorMessage(result); return 1; } if (settings.bMeasureOverdraw) { // measure input overdraw. Note that we assume counter-clockwise vertex winding. result = TootleMeasureOverdraw(pfVB, pnIB, nVertices, nFaces, nStride, pViewpoints, nViewpoints, settings.eWinding, &stats.fOverdrawIn, &stats.fMaxOverdrawIn); if (result != TOOTLE_OK) { DisplayTootleErrorMessage(result); return 1; } } // allocate an array to hold the cluster ID for each face std::vector<unsigned int> faceClusters; faceClusters.resize(nFaces + 1); unsigned int nNumClusters; Timer timer; timer.Reset(); // ********************************************************************************************************************** // Optimize the mesh: // // The following cases show five examples for developers on how to use the library functions in Tootle. // 1. If you are interested in optimizing vertex cache only, see the TOOTLE_VCACHE_ONLY case. // 2. If you are interested to optimize vertex cache and overdraw, see either TOOTLE_CLUSTER_VCACHE_OVERDRAW // or TOOTLE_OPTIMIZE cases. The former uses three separate function calls while the latter uses a single // utility function. // 3. To use the algorithm from SIGGRAPH 2007 (v2.0), see TOOTLE_FAST_VCACHECLUSTER_OVERDRAW or TOOTLE_FAST_OPTIMIZE // cases. The former uses two separate function calls while the latter uses a single utility function. // // Note the algorithm from SIGGRAPH 2007 (v2.0) is very fast but produces less quality results especially for the // overdraw optimization. During our experiments with some medium size models, we saw an improvement of 1000x in // running time (from 20+ minutes to less than 1 second) for using v2.0 calls vs v1.2 calls. The resulting vertex // cache optimization is very similar while the overdraw optimization drops from 3.8x better to 2.5x improvement over // the input mesh. // Developers should always run the overdraw optimization using the fast algorithm from SIGGRAPH initially. // If they require a better result, then re-run the overdraw optimization using the old v1.2 path (TOOTLE_OVERDRAW_AUTO). // Passing TOOTLE_OVERDRAW_AUTO to the algorithm will let the algorithm choose between Direct3D or raytracing path // depending on the total number of clusters (less than 200 clusters, it will use Direct3D path otherwise it will // use raytracing path since the raytracing path will be faster than the Direct3D path at that point). // // Tips: use v2.0 for fast optimization, and v1.2 to further improve the result by mix-matching the calls. // ********************************************************************************************************************** switch (settings.algorithmChoice) { case TOOTLE_VCACHE_ONLY: // ******************************************************************************************************************* // Perform Vertex Cache Optimization ONLY // ******************************************************************************************************************* stats.nClusters = 1; // Optimize vertex cache result = TootleOptimizeVCache(pnIB, nFaces, nVertices, settings.nCacheSize, pnIB, NULL, settings.eVCacheOptimizer); if (result != TOOTLE_OK) { DisplayTootleErrorMessage(result); return 1; } stats.fOptimizeVCacheTime = timer.GetElapsed(); break; case TOOTLE_CLUSTER_VCACHE_OVERDRAW: // ******************************************************************************************************************* // An example of calling clustermesh, vcacheclusters and optimize overdraw individually. // This case demonstrate mix-matching v1.2 clustering with v2.0 overdraw optimization. // ******************************************************************************************************************* // Cluster the mesh, and sort faces by cluster. result = TootleClusterMesh(pfVB, pnIB, nVertices, nFaces, nStride, settings.nClustering, pnIB, &faceClusters[0], NULL); if (result != TOOTLE_OK) { DisplayTootleErrorMessage(result); return 1; } stats.fClusterMeshTime = timer.GetElapsed(); timer.Reset(); // The last entry of of faceClusters store the total number of clusters. stats.nClusters = faceClusters[ nFaces ]; // Perform vertex cache optimization on the clustered mesh. result = TootleVCacheClusters(pnIB, nFaces, nVertices, settings.nCacheSize, &faceClusters[0], pnIB, NULL, settings.eVCacheOptimizer); if (result != TOOTLE_OK) { DisplayTootleErrorMessage(result); return 1; } stats.fVCacheClustersTime = timer.GetElapsed(); timer.Reset(); // Optimize the draw order (using v1.2 path: TOOTLE_OVERDRAW_AUTO, the default path is from v2.0--SIGGRAPH version). result = TootleOptimizeOverdraw(pfVB, pnIB, nVertices, nFaces, nStride, pViewpoints, nViewpoints, settings.eWinding, &faceClusters[0], pnIB, NULL, TOOTLE_OVERDRAW_AUTO); if (result != TOOTLE_OK) { DisplayTootleErrorMessage(result); return 1; } stats.fOptimizeOverdrawTime = timer.GetElapsed(); break; case TOOTLE_FAST_VCACHECLUSTER_OVERDRAW: // ******************************************************************************************************************* // An example of calling v2.0 optimize vertex cache and clustering mesh with v1.2 overdraw optimization. // ******************************************************************************************************************* // Optimize vertex cache and create cluster // The algorithm from SIGGRAPH combine the vertex cache optimization and clustering mesh into a single step result = TootleFastOptimizeVCacheAndClusterMesh(pnIB, nFaces, nVertices, settings.nCacheSize, pnIB, &faceClusters[0], &nNumClusters, TOOTLE_DEFAULT_ALPHA); if (result != TOOTLE_OK) { // an error detected DisplayTootleErrorMessage(result); return 1; } stats.fOptimizeVCacheAndClusterMeshTime = timer.GetElapsed(); timer.Reset(); stats.nClusters = nNumClusters; // In this example, we use TOOTLE_OVERDRAW_AUTO to show that we can mix-match the clustering and // vcache computation from the new library with the overdraw optimization from the old library. // TOOTLE_OVERDRAW_AUTO will choose between using Direct3D or CPU raytracing path. This path is // much slower than TOOTLE_OVERDRAW_FAST but usually produce 2x better results. result = TootleOptimizeOverdraw(pfVB, pnIB, nVertices, nFaces, nStride, NULL, 0, settings.eWinding, &faceClusters[0], pnIB, NULL, TOOTLE_OVERDRAW_AUTO); if (result != TOOTLE_OK) { // an error detected DisplayTootleErrorMessage(result); return 1; } stats.fOptimizeOverdrawTime = timer.GetElapsed(); break; case TOOTLE_OPTIMIZE: // ******************************************************************************************************************* // An example of using a single utility function to perform v1.2 optimizations. // ******************************************************************************************************************* // This function will compute the entire optimization (cluster mesh, vcache per cluster, and optimize overdraw). // It will use TOOTLE_OVERDRAW_FAST as the default overdraw optimization result = TootleOptimize(pfVB, pnIB, nVertices, nFaces, nStride, settings.nCacheSize, pViewpoints, nViewpoints, settings.eWinding, pnIB, &nNumClusters, settings.eVCacheOptimizer); if (result != TOOTLE_OK) { DisplayTootleErrorMessage(result); return 1; } stats.fTootleOptimizeTime = timer.GetElapsed(); stats.nClusters = nNumClusters; break; case TOOTLE_FAST_OPTIMIZE: // ******************************************************************************************************************* // An example of using a single utility function to perform v2.0 optimizations. // ******************************************************************************************************************* // This function will compute the entire optimization (optimize vertex cache, cluster mesh, and optimize overdraw). // It will use TOOTLE_OVERDRAW_FAST as the default overdraw optimization result = TootleFastOptimize(pfVB, pnIB, nVertices, nFaces, nStride, settings.nCacheSize, settings.eWinding, pnIB, &nNumClusters, TOOTLE_DEFAULT_ALPHA); if (result != TOOTLE_OK) { DisplayTootleErrorMessage(result); return 1; } stats.fTootleFastOptimizeTime = timer.GetElapsed(); stats.nClusters = nNumClusters; break; default: // wrong algorithm choice break; } // measure output VCache efficiency result = TootleMeasureCacheEfficiency(pnIB, nFaces, settings.nCacheSize, &stats.fVCacheOut); if (result != TOOTLE_OK) { DisplayTootleErrorMessage(result); return 1; } if (settings.bMeasureOverdraw) { // measure output overdraw timer.Reset(); result = TootleMeasureOverdraw(pfVB, pnIB, nVertices, nFaces, nStride, pViewpoints, nViewpoints, settings.eWinding, &stats.fOverdrawOut, &stats.fMaxOverdrawOut); stats.fMeasureOverdrawTime = timer.GetElapsed(); if (result != TOOTLE_OK) { DisplayTootleErrorMessage(result); return 1; } } //----------------------------------------------------------------------------------------------------- // PERFORM VERTEX MEMORY OPTIMIZATION (rearrange memory layout for vertices based on the final indices // to exploit vertex cache prefetch). // We want to optimize the vertex memory locations based on the final optimized index buffer that will // be in the output file. // Thus, in this sample code, we recompute a copy of the indices that point to the original vertices // (pnIBTmp) to be passed into the function TootleOptimizeVertexMemory. If we use the array pnIB, we // will optimize for the wrong result since the array pnIB is based on the rehashed vertex location created // by the function ObjLoader. //----------------------------------------------------------------------------------------------------- timer.Reset(); std::vector<unsigned int> pnVertexRemapping; unsigned int nReferencedVertices = 0; // The actual total number of vertices referenced by the indices if (settings.bOptimizeVertexMemory) { std::vector<unsigned int> pnIBTmp; pnIBTmp.resize(nFaces * 3); // compute the indices to be optimized for (the original pointed by the obj file). for (unsigned int i = 0; i < indices.size(); i += 3) { for (int j = 0; j < 3; j++) { const ObjVertexFinal& rVertex = objVertices[ pnIB[ i + j ] ]; pnIBTmp[ i + j ] = rVertex.nVertexIndex - 1; // index is off by 1 // compute the max vertices if (rVertex.nVertexIndex > nReferencedVertices) { nReferencedVertices = rVertex.nVertexIndex; } } } pnVertexRemapping.resize(nReferencedVertices); // For this sample code, we are just going to use vertexRemapping array result. This is to support general obj // file input and output. // In fact, we are sending the wrong vertex buffer here (it should be based on the original file instead of the // rehashed vertices). But, it is ok because we do not request the reordered vertex buffer as an output. result = TootleOptimizeVertexMemory(pfVB, &pnIBTmp[0], nReferencedVertices, nFaces, nStride, NULL, NULL, &pnVertexRemapping[0]); if (result != TOOTLE_OK) { DisplayTootleErrorMessage(result); return 1; } stats.fOptimizeVertexMemoryTime = timer.GetElapsed(); } // clean up tootle TootleCleanup(); // print tootle statistics to stdout and stderr // display the current test case PrintAlgorithm(stderr, settings.eVCacheOptimizer, settings.algorithmChoice, settings.nCacheSize, stats.nClusters); PrintAlgorithm(stdout, settings.eVCacheOptimizer, settings.algorithmChoice, settings.nCacheSize, stats.nClusters); PrintStats(stdout, &stats); PrintStats(stderr, &stats); // emit a modified .OBJ file std::ifstream inputStream(settings.pMeshName); bool bResult; if (settings.bOptimizeVertexMemory) { bResult = EmitModifiedObj(inputStream, std::cout, objVertices, indices, &pnVertexRemapping[0], nReferencedVertices); } else { bResult = EmitModifiedObj(inputStream, std::cout, objVertices, indices, NULL, 0); } if (bResult) { return 1; } return 0; }
// CompareHashTimes_Small //------------------------------------------------------------------------------ void TestHash::CompareHashTimes_Small() const { // some different strings to hash Array< AString > strings( 32, true ); strings.Append( AString( " " ) ); strings.Append( AString( "short" ) ); strings.Append( AString( "mediumstringmediumstring123456789" ) ); strings.Append( AString( "longstring_98274ncoif834jodhiorhmwe8r8wy48on87h8mhwejrijrdierwurd9j,8chm8hiuorciwriowjri" ) ); strings.Append( AString( "c:\\files\\subdir\\project\\thing\\stuff.cpp" ) ); const size_t numStrings = strings.GetSize(); const size_t numIterations = 102400; // calc datasize size_t dataSize( 0 ); for ( size_t i=0; i<numStrings; ++i ) { dataSize += strings[ i ].GetLength(); } dataSize *= numIterations; // xxHash - 32 { Timer t; uint32_t crc( 0 ); for ( size_t j=0; j<numIterations; ++j ) { for ( size_t i=0; i<numStrings; ++i ) { crc += xxHash::Calc32( strings[ i ].Get(), strings[ i ].GetLength() ); } } float time = t.GetElapsed(); float speed = ( (float)dataSize / (float)( 1024 * 1024 * 1024 ) ) / time; OUTPUT( "xxHash-32 : %2.3fs @ %6.3f GiB/s (hash: 0x%x)\n", time, speed, crc ); } // xxHash - 64 { Timer t; uint64_t crc( 0 ); for ( size_t j=0; j<numIterations; ++j ) { for ( size_t i=0; i<numStrings; ++i ) { crc += xxHash::Calc64( strings[ i ].Get(), strings[ i ].GetLength() ); } } float time = t.GetElapsed(); float speed = ( (float)dataSize / (float)( 1024 * 1024 * 1024 ) ) / time; OUTPUT( "xxHash-64 : %2.3fs @ %6.3f GiB/s (hash: %016llx)\n", time, speed, crc ); } // Murmur3 - 32 { Timer t; uint32_t crc( 0 ); for ( size_t j=0; j<numIterations; ++j ) { for ( size_t i=0; i<numStrings; ++i ) { crc += Murmur3::Calc32( strings[ i ].Get(), strings[ i ].GetLength() ); } } float time = t.GetElapsed(); float speed = ( (float)dataSize / (float)( 1024 * 1024 * 1024 ) ) / time; OUTPUT( "Murmur3-32 : %2.3fs @ %6.3f GiB/s (hash: 0x%x)\n", time, speed, crc ); } // Murmur3 - 128 { Timer t; uint64_t hashB( 0 ); uint64_t hashA( 0 ); for ( size_t j=0; j<numIterations; ++j ) { for ( size_t i=0; i<numStrings; ++i ) { hashA += Murmur3::Calc128( strings[ i ].Get(), strings[ i ].GetLength(), hashB ); } } float time = t.GetElapsed(); float speed = ( (float)dataSize / (float)( 1024 * 1024 * 1024 ) ) / time; OUTPUT( "Murmur3-128 : %2.3fs @ %6.3f GiB/s (%016llx, %016llx)\n", time, speed, hashA, hashB ); } // CRC32 - 8x8 slicing { Timer t; uint32_t crc( 0 ); for ( size_t j=0; j<numIterations; ++j ) { for ( size_t i=0; i<numStrings; ++i ) { crc += CRC32::Calc( strings[ i ].Get(), strings[ i ].GetLength() ); } } float time = t.GetElapsed(); float speed = ( (float)dataSize / (float)( 1024 * 1024 * 1024 ) ) / time; OUTPUT( "CRC32 8x8 : %2.3fs @ %6.3f GiB/s (hash: 0x%x)\n", time, speed, crc ); } // CRC32 - "standard" algorithm { Timer t; uint32_t crc( 0 ); for ( size_t j=0; j<numIterations; ++j ) { for ( size_t i=0; i<numStrings; ++i ) { crc += CRC32::Start(); crc += CRC32::Update( crc, strings[ i ].Get(), strings[ i ].GetLength() ); crc += CRC32::Stop( crc ); } } float time = t.GetElapsed(); float speed = ( (float)dataSize / (float)( 1024 * 1024 * 1024 ) ) / time; OUTPUT( "CRC32 : %2.3fs @ %6.3f GiB/s (hash: 0x%x)\n", time, speed, crc ); } // CRC32Lower { Timer t; uint32_t crc( 0 ); for ( size_t j=0; j<numIterations; ++j ) { for ( size_t i=0; i<numStrings; ++i ) { crc += CRC32::CalcLower( strings[ i ].Get(), strings[ i ].GetLength() ); } } float time = t.GetElapsed(); float speed = ( (float)dataSize / (float)( 1024 * 1024 * 1024 ) ) / time; OUTPUT( "CRC32Lower : %2.3fs @ %6.3f GiB/s (hash: 0x%x)\n", time, speed, crc ); } // Murmur3 - 32 Lower { Timer t; // lower-case and hash it uint32_t crc( 0 ); for ( size_t j=0; j<numIterations; ++j ) { for ( size_t i=0; i<numStrings; ++i ) { const AString & str( strings[ i ] ); AStackString<> tmp; const size_t strLen( str.GetLength() ); tmp.SetLength( (uint32_t)strLen ); for ( size_t k=0; k<strLen; ++k ) { char c = str[ (uint32_t)k ]; tmp[ (uint32_t)k ] = ( ( c >= 'A' ) && ( c <= 'Z' ) ) ? 'a' + ( c - 'A' ) : c; } crc += Murmur3::Calc32( tmp.Get(), tmp.GetLength() ); } } float time = t.GetElapsed(); float speed = ( (float)dataSize / (float)( 1024 * 1024 * 1024 ) ) / time; OUTPUT( "Murmur3-32-Lower: %2.3fs @ %6.3f GiB/s (hash: 0x%x)\n", time, speed, crc ); } }
int main(int argc, char *argv[]) { TestStructs(); #ifdef _CELL InitPPECallbacks(); InitSPEs(); #endif Init(); if(parseArgs(argc, argv) != 0) return 1; /* Init framebuffer and Scene */ printf("lzrt %ix%i (" ARCH_STR ")\n", args.width, args.height); ImageBuffer *imgbuf = new FrameBuffer(args.width, args.height, args.fullscreen, "lzrt %ix%i (" ARCH_STR ")", args.width, args.height); Scene scene(imgbuf, args.animate, args.renderer); // Execute Lua script which sets up the scene lua_State *L = 0; L = InitLua(); if(lua_dofile(L, args.luascript) != 0) { printf("Error: Couldn't execute Lua script '%s'\n", args.luascript); lua_close(L); return 1; } threadpool = new ThreadPool(args.nthreads); // Setup Scene scene.Setup(threadpool); scene.imgbuf->SetClearColor(0.2f, 0.2f, 0.3f); //scene.imgbuf->SetClearColor(1.0f, 1.0f, 1.0f); Timer timer; while(run) { SDLevent(); threadpool->SetNumThreads(args.nthreads); timer.Mark(); // Call Lua loop function if(L != 0 && scene.animate) call_function<void>(L, "lzrtloop"); scene.camera->Move(firstframe, camerapos.x, camerapos.y, camerapos.z); scene.imgbuf->Clear(); scene.Render(threadpool, args.njobs); args.njobs = scene.njobs; rendertime = timer.GetElapsed(); PrintStats(imgbuf, &scene); if(scene.animate || firstframe) frame++; imgbuf->Update(); firstframe = false; } }
// client TCPConnectionPool client; const ConnectionInfo * ci = client.Connect( AStackString<>( "127.0.0.1" ), testPort ); TEST_ASSERT( ci ); size_t sendSize = 31; while ( sendSize <= maxSendSize ) { server.m_ReceivedBytes = 0; server.m_DataSize = sendSize; Timer timer; size_t totalSent = 0; while ( timer.GetElapsed() < 0.1f ) { // client sends some know data to the server TEST_ASSERT( client.Send( ci, data.Get(), sendSize ) ); totalSent += sendSize; } while( server.m_ReceivedBytes < totalSent ) { Thread::Sleep( 1 ); } const float speedMBs = ( float( totalSent ) / timer.GetElapsed() ) / float( 1024 * 1024 ); OUTPUT( "Speed: %2.1f MiB/s, SendSize: %u\n", speedMBs, (uint32_t)sendSize ); sendSize = ( sendSize * 2 ) + 33; // +33 to avoid powers of 2
// MainCommon //------------------------------------------------------------------------------ int MainCommon( const AString & args, void * hInstance ) { // don't buffer output VERIFY( setvbuf(stdout, nullptr, _IONBF, 0) == 0 ); VERIFY( setvbuf(stderr, nullptr, _IONBF, 0) == 0 ); // process cmd line args FBuildWorkerOptions options; if ( options.ProcessCommandLine( args ) == false ) { return -3; } // only allow 1 worker per system Timer t; while ( g_OneProcessMutex.TryLock() == false ) { // retry for upto 2 seconds, to allow some time for old worker to close if ( t.GetElapsed() > 5.0f ) { ShowMsgBox( "An FBuildWorker is already running!" ); return -1; } Thread::Sleep(100); } #if defined( __WINDOWS__ ) if ( options.m_UseSubprocess && !options.m_IsSubprocess ) { return LaunchSubProcess( args ); } #endif // prevent popups when launching tools with missing dlls #if defined( __WINDOWS__ ) ::SetErrorMode( SEM_FAILCRITICALERRORS ); #else // TODO:MAC SetErrorMode equivalent // TODO:LINUX SetErrorMode equivalent #endif #if defined( __WINDOWS__ ) VERIFY( SetPriorityClass( GetCurrentProcess(), BELOW_NORMAL_PRIORITY_CLASS ) ); #else // TODO:MAC SetPriorityClass equivalent // TODO:LINUX SetPriorityClass equivalent #endif // start the worker and wait for it to be closed int ret; { Worker worker( hInstance, args ); if ( options.m_OverrideCPUAllocation ) { WorkerSettings::Get().SetNumCPUsToUse( options.m_CPUAllocation ); } if ( options.m_OverrideWorkMode ) { WorkerSettings::Get().SetMode( options.m_WorkMode ); } ret = worker.Work(); } MEMTRACKER_DUMP_ALLOCATIONS return ret; }
REGISTER_TESTS_END // CompareHashTimes //------------------------------------------------------------------------------ void TestHash::CompareHashTimes_Large() const { // use pseudo-random (but deterministic) data const uint32_t seed = 0xB1234567; Random r( seed ); // fill a buffer to use for tests const size_t dataSize( 64 * 1024 * 1024 ); AutoPtr< uint32_t > data( (uint32_t *)ALLOC( dataSize ) ); for ( size_t i=0; i<dataSize / sizeof( uint32_t ); ++i ) { data.Get()[ i ] = r.GetRand(); } // baseline - sum 64 bits { Timer t; uint64_t sum( 0 ); uint64_t * it = (uint64_t *)data.Get(); uint64_t * end = it + ( dataSize / sizeof( uint64_t ) ); while ( it != end ) { sum += *it; ++it; } float time = t.GetElapsed(); float speed = ( (float)dataSize / (float)( 1024 * 1024 * 1024 ) ) / time; OUTPUT( "Sum64 : %2.3fs @ %2.3f GiB/s (sum: %016llx)\n", time, speed, sum ); } // baseline - sum 32 bits { Timer t; uint32_t sum( 0 ); uint32_t * it = data.Get(); uint32_t * end = it + ( dataSize / sizeof( uint32_t ) ); while ( it != end ) { sum += *it; ++it; } float time = t.GetElapsed(); float speed = ( (float)dataSize / (float)( 1024 * 1024 * 1024 ) ) / time; OUTPUT( "Sum32 : %2.3fs @ %6.3f GiB/s (sum: 0x%x)\n", time, speed, sum ); } // xxHash32 { Timer t; uint32_t crc = xxHash::Calc32( data.Get(), dataSize ); float time = t.GetElapsed(); float speed = ( (float)dataSize / (float)( 1024 * 1024 * 1024 ) ) / time; OUTPUT( "xxHash-32 : %2.3fs @ %6.3f GiB/s (hash: 0x%x)\n", time, speed, crc ); } // xxHash64 { Timer t; uint64_t crc = xxHash::Calc64( data.Get(), dataSize ); float time = t.GetElapsed(); float speed = ( (float)dataSize / (float)( 1024 * 1024 * 1024 ) ) / time; OUTPUT( "xxHash-64 : %2.3fs @ %6.3f GiB/s (hash: %016llx)\n", time, speed, crc ); } // Murmur3 - 32 { Timer t; uint32_t crc = Murmur3::Calc32( data.Get(), dataSize ); float time = t.GetElapsed(); float speed = ( (float)dataSize / (float)( 1024 * 1024 * 1024 ) ) / time; OUTPUT( "Murmur3-32 : %2.3fs @ %6.3f GiB/s (hash: 0x%x)\n", time, speed, crc ); } // Murmur3 - 128 { Timer t; uint64_t hashB( 0 ); uint64_t hashA = Murmur3::Calc128( data.Get(), dataSize, hashB ); float time = t.GetElapsed(); float speed = ( (float)dataSize / (float)( 1024 * 1024 * 1024 ) ) / time; OUTPUT( "Murmur3-128 : %2.3fs @ %6.3f GiB/s (%016llx, %016llx)\n", time, speed, hashA, hashB ); } // CRC32 - 8x8 slicing { Timer t; uint32_t crc = CRC32::Calc( data.Get(), dataSize ); float time = t.GetElapsed(); float speed = ( (float)dataSize / (float)( 1024 * 1024 * 1024 ) ) / time; OUTPUT( "CRC32 8x8 : %2.3fs @ %6.3f GiB/s (hash: 0x%x)\n", time, speed, crc ); } // CRC32 - "standard" algorithm { Timer t; uint32_t crc = CRC32::Start(); crc = CRC32::Update( crc, data.Get(), dataSize ); crc = CRC32::Stop( crc ); float time = t.GetElapsed(); float speed = ( (float)dataSize / (float)( 1024 * 1024 * 1024 ) ) / time; OUTPUT( "CRC32 : %2.3fs @ %6.3f GiB/s (hash: 0x%x)\n", time, speed, crc ); } // CRC32Lower { Timer t; uint32_t crc = CRC32::CalcLower( data.Get(), dataSize ); float time = t.GetElapsed(); float speed = ( (float)dataSize / (float)( 1024 * 1024 * 1024 ) ) / time; OUTPUT( "CRC32Lower : %2.3fs @ %6.3f GiB/s (hash: 0x%x)\n", time, speed, crc ); } // Murmur3 - 32 Lower { Timer t; // lower-case the data into a copy AutoPtr< uint32_t > dataCopy( (uint32_t *)ALLOC( dataSize ) ); const char * src = (const char * )data.Get(); const char * end( src + ( dataSize / sizeof( uint32_t ) ) ); char * dst = (char *)dataCopy.Get(); while ( src < end ) { char c = *src; *dst = ( ( c >= 'A' ) && ( c <= 'Z' ) ) ? 'a' + c - 'A' : c ; ++src; ++dst; } // hash it uint32_t crc = Murmur3::Calc32( dataCopy.Get(), dataSize ); float time = t.GetElapsed(); float speed = ( (float)dataSize / (float)( 1024 * 1024 * 1024 ) ) / time; OUTPUT( "Murmur3-32-Lower: %2.3fs @ %6.3f GiB/s (hash: 0x%x)\n", time, speed, crc ); } }
// CompressHelper //------------------------------------------------------------------------------ void TestCompressor::CompressHelper( const char * fileName ) const { // read some test data into a file AutoPtr< void > data; size_t dataSize; { FileStream fs; TEST_ASSERT( fs.Open( fileName ) ); dataSize = (size_t)fs.GetFileSize(); data = (char *)ALLOC( dataSize ); TEST_ASSERT( (uint32_t)fs.Read( data.Get(), dataSize ) == dataSize ); } OUTPUT( "File : %s\n", fileName ); OUTPUT( "Size : %u\n", (uint32_t)dataSize ); // compress the data to obtain size Compressor comp; comp.Compress( data.Get(), dataSize ); size_t compressedSize = comp.GetResultSize(); AutoPtr< char > compressedData( (char *)ALLOC( compressedSize ) ); memcpy( compressedData.Get(), comp.GetResult(), compressedSize ); float compressedPerc = ( (float)compressedSize / (float)dataSize ) * 100.0f; OUTPUT( "Compressed Size: %u (%2.1f%% of original)\n", (uint32_t)compressedSize, compressedPerc ); // decompress to check we get original data back Compressor decomp; decomp.Decompress( compressedData.Get() ); size_t uncompressedSize = decomp.GetResultSize(); TEST_ASSERT( uncompressedSize == dataSize ); for ( size_t i=0; i<uncompressedSize; ++i ) { TEST_ASSERT( ( (char *)data.Get() )[ i ] == ( (char *)decomp.GetResult() )[ i ] ); } // speed checks //-------------- const size_t NUM_REPEATS( 100 ); // compress the data several times to get more stable throughput value Timer t; for ( size_t i=0; i<NUM_REPEATS; ++i ) { Compressor c; c.Compress( data.Get(), dataSize ); TEST_ASSERT( c.GetResultSize() == compressedSize ); } float compressTimeTaken = t.GetElapsed(); double compressThroughputMBs = ( ( (double)dataSize / 1024.0 * (double)NUM_REPEATS ) / compressTimeTaken ) / 1024.0; OUTPUT( " Comp Speed: %2.1f MB/s - %2.3fs (%u repeats)\n", (float)compressThroughputMBs, compressTimeTaken, NUM_REPEATS ); // decompress the data Timer t2; for ( size_t i=0; i<NUM_REPEATS; ++i ) { Compressor d; d.Decompress( compressedData.Get() ); TEST_ASSERT( d.GetResultSize() == dataSize ); } float decompressTimeTaken = t2.GetElapsed(); double decompressThroughputMBs = ( ( (double)dataSize / 1024.0 * (double)NUM_REPEATS ) / decompressTimeTaken ) / 1024.0; OUTPUT( " Decomp Speed: %2.1f MB/s - %2.3fs (%u repeats)\n", (float)decompressThroughputMBs, decompressTimeTaken, NUM_REPEATS ); // time memcpy to compare with Timer t0; for ( size_t i=0; i<NUM_REPEATS; ++i ) { char * mem = (char *)ALLOC( dataSize ); memcpy( mem, data.Get(), dataSize ); FREE( mem ); } float memcpyTimeTaken = t0.GetElapsed(); double memcpyThroughputMBs = ( ( (double)dataSize / 1024.0 * (double)NUM_REPEATS ) / memcpyTimeTaken ) / 1024.0; OUTPUT( " MemCpy Speed: %2.1f MB/s - %2.3fs (%u repeats)\n", (float)memcpyThroughputMBs, memcpyTimeTaken, NUM_REPEATS ); }