//================================================================================================================================= /// A utility function to sort the materials in a mesh to minimize overdraw, without optimizing within materials. This kind of /// optimization can be very effective by itself, or it can be combined with inter-material optimization for even better results. /// This function is currently not used in the sample, it is provided for illustrative purposes only. /// /// - pTriMaterialIDs is an array containing a material index for each triangle /// - nMaterials is the number of materials in the mesh /// /// The triangles are assumed to be sorted by material, meaning that pTriMaterialIDs contains 00000...1111....2222.... and so on... /// /// \param pfVB A pointer to the vertex buffer. The pointer pVB must point to the vertex position. The vertex /// position must be a 3-component floating point value (X,Y,Z). /// \param pnIB The mesh index buffer. This must be a triangle list. The faces must be clustered /// \param nVertices The number of vertices in the mesh. This must be non-zero and less than TOOTLE_MAX_VERTICES. /// \param nFaces The number of faces in the mesh. This must be non-zero and less than TOOTLE_MAX_FACES. /// \param nVBStride The distance between successive vertices in the vertex buffer, in bytes. This must be at least /// 3*sizeof(float)./// \param pnIB /// \param pnTriMaterialIDs An array containing a material index for each triangle. /// \param nMaterials the number of materials in the mesh. /// /// \return An array containing the new material order. Element 0 in this array contains the ID of the /// material that should be drawn first, element 1 contains the second, element 2 contains the third, and so on. //================================================================================================================================= UINT* SortMaterials(const float* pfVB, const UINT* pnIB, UINT nVertices, UINT nVBStride, UINT nFaces, UINT* pnTriMaterialIDs, UINT nMaterials) { // make an array to hold the material re-mapping UINT* pnMaterialRemap = new UINT [ nMaterials ]; // optimize the draw order TootleResult result = TootleOptimizeOverdraw(pfVB, pnIB, nVertices, nFaces, nVBStride, NULL, 0, // use default viewpoints TOOTLE_CCW, pnTriMaterialIDs, // cluster IDs == material ID NULL, // do not care about re-mapped index buffer pnMaterialRemap); if (result != TOOTLE_OK) { return NULL; // uh-oh } return pnMaterialRemap; }
//================================================================================================================================= /// The main function. //================================================================================================================================= int main(int argc, char* argv[]) { // initialize settings to defaults TootleSettings settings; settings.pMeshName = NULL; settings.pViewpointName = NULL; settings.nClustering = 0; settings.nCacheSize = TOOTLE_DEFAULT_VCACHE_SIZE; settings.eWinding = TOOTLE_CW; settings.algorithmChoice = TOOTLE_OPTIMIZE; settings.eVCacheOptimizer = TOOTLE_VCACHE_AUTO; // the auto selection as the default to optimize vertex cache settings.bOptimizeVertexMemory = true; // default value is to optimize the vertex memory settings.bMeasureOverdraw = true; // default is to measure overdraw // parse the command line ParseCommandLine(argc, argv, &settings); // *************************************************** // Load the mesh // *************************************************** // read the mesh from the OBJ file std::vector<ObjVertexFinal> objVertices; std::vector<ObjFace> objFaces; ObjLoader loader; if (!loader.LoadGeometry(settings.pMeshName, objVertices, objFaces)) { std::cerr << "Error loading mesh file: " << settings.pMeshName << std::endl; return 1; } // build buffers containing only the vertex positions and indices, since this is what Tootle requires std::vector<ObjVertex3D> vertices; vertices.resize(objVertices.size()); for (unsigned int i = 0; i < vertices.size(); i++) { vertices[i] = objVertices[i].pos; } std::vector<unsigned int> indices; indices.resize(objFaces.size() * 3); for (unsigned int i = 0; i < indices.size(); i++) { indices[i] = objFaces[ i / 3 ].finalVertexIndices[ i % 3 ]; } // ****************************************** // Load viewpoints if necessary // ****************************************** // read viewpoints if needed std::vector<ObjVertex3D> viewpoints; if (settings.pViewpointName != NULL) { if (!LoadViewpoints(settings.pViewpointName, viewpoints)) { std::cerr << "Unable to load viewpoints from file: " << settings.pViewpointName; return 1; } } // if we didn't get any viewpoints, then use a NULL array const float* pViewpoints = NULL; unsigned int nViewpoints = (unsigned int) viewpoints.size(); if (viewpoints.size() > 0) { pViewpoints = (const float*) &viewpoints[0]; } // ***************************************************************** // Prepare the mesh and initialize stats variables // ***************************************************************** unsigned int nFaces = (unsigned int) indices.size() / 3; unsigned int nVertices = (unsigned int) vertices.size(); float* pfVB = (float*) &vertices[0]; unsigned int* pnIB = (unsigned int*) &indices[0]; unsigned int nStride = 3 * sizeof(float); TootleStats stats; // initialize the timing variables stats.fOptimizeVCacheTime = INVALID_TIME; stats.fClusterMeshTime = INVALID_TIME; stats.fVCacheClustersTime = INVALID_TIME; stats.fOptimizeVCacheAndClusterMeshTime = INVALID_TIME; stats.fOptimizeOverdrawTime = INVALID_TIME; stats.fTootleOptimizeTime = INVALID_TIME; stats.fTootleFastOptimizeTime = INVALID_TIME; stats.fMeasureOverdrawTime = INVALID_TIME; stats.fOptimizeVertexMemoryTime = INVALID_TIME; TootleResult result; // initialize Tootle result = TootleInit(); if (result != TOOTLE_OK) { DisplayTootleErrorMessage(result); return 1; } // measure input VCache efficiency result = TootleMeasureCacheEfficiency(pnIB, nFaces, settings.nCacheSize, &stats.fVCacheIn); if (result != TOOTLE_OK) { DisplayTootleErrorMessage(result); return 1; } if (settings.bMeasureOverdraw) { // measure input overdraw. Note that we assume counter-clockwise vertex winding. result = TootleMeasureOverdraw(pfVB, pnIB, nVertices, nFaces, nStride, pViewpoints, nViewpoints, settings.eWinding, &stats.fOverdrawIn, &stats.fMaxOverdrawIn); if (result != TOOTLE_OK) { DisplayTootleErrorMessage(result); return 1; } } // allocate an array to hold the cluster ID for each face std::vector<unsigned int> faceClusters; faceClusters.resize(nFaces + 1); unsigned int nNumClusters; Timer timer; timer.Reset(); // ********************************************************************************************************************** // Optimize the mesh: // // The following cases show five examples for developers on how to use the library functions in Tootle. // 1. If you are interested in optimizing vertex cache only, see the TOOTLE_VCACHE_ONLY case. // 2. If you are interested to optimize vertex cache and overdraw, see either TOOTLE_CLUSTER_VCACHE_OVERDRAW // or TOOTLE_OPTIMIZE cases. The former uses three separate function calls while the latter uses a single // utility function. // 3. To use the algorithm from SIGGRAPH 2007 (v2.0), see TOOTLE_FAST_VCACHECLUSTER_OVERDRAW or TOOTLE_FAST_OPTIMIZE // cases. The former uses two separate function calls while the latter uses a single utility function. // // Note the algorithm from SIGGRAPH 2007 (v2.0) is very fast but produces less quality results especially for the // overdraw optimization. During our experiments with some medium size models, we saw an improvement of 1000x in // running time (from 20+ minutes to less than 1 second) for using v2.0 calls vs v1.2 calls. The resulting vertex // cache optimization is very similar while the overdraw optimization drops from 3.8x better to 2.5x improvement over // the input mesh. // Developers should always run the overdraw optimization using the fast algorithm from SIGGRAPH initially. // If they require a better result, then re-run the overdraw optimization using the old v1.2 path (TOOTLE_OVERDRAW_AUTO). // Passing TOOTLE_OVERDRAW_AUTO to the algorithm will let the algorithm choose between Direct3D or raytracing path // depending on the total number of clusters (less than 200 clusters, it will use Direct3D path otherwise it will // use raytracing path since the raytracing path will be faster than the Direct3D path at that point). // // Tips: use v2.0 for fast optimization, and v1.2 to further improve the result by mix-matching the calls. // ********************************************************************************************************************** switch (settings.algorithmChoice) { case TOOTLE_VCACHE_ONLY: // ******************************************************************************************************************* // Perform Vertex Cache Optimization ONLY // ******************************************************************************************************************* stats.nClusters = 1; // Optimize vertex cache result = TootleOptimizeVCache(pnIB, nFaces, nVertices, settings.nCacheSize, pnIB, NULL, settings.eVCacheOptimizer); if (result != TOOTLE_OK) { DisplayTootleErrorMessage(result); return 1; } stats.fOptimizeVCacheTime = timer.GetElapsed(); break; case TOOTLE_CLUSTER_VCACHE_OVERDRAW: // ******************************************************************************************************************* // An example of calling clustermesh, vcacheclusters and optimize overdraw individually. // This case demonstrate mix-matching v1.2 clustering with v2.0 overdraw optimization. // ******************************************************************************************************************* // Cluster the mesh, and sort faces by cluster. result = TootleClusterMesh(pfVB, pnIB, nVertices, nFaces, nStride, settings.nClustering, pnIB, &faceClusters[0], NULL); if (result != TOOTLE_OK) { DisplayTootleErrorMessage(result); return 1; } stats.fClusterMeshTime = timer.GetElapsed(); timer.Reset(); // The last entry of of faceClusters store the total number of clusters. stats.nClusters = faceClusters[ nFaces ]; // Perform vertex cache optimization on the clustered mesh. result = TootleVCacheClusters(pnIB, nFaces, nVertices, settings.nCacheSize, &faceClusters[0], pnIB, NULL, settings.eVCacheOptimizer); if (result != TOOTLE_OK) { DisplayTootleErrorMessage(result); return 1; } stats.fVCacheClustersTime = timer.GetElapsed(); timer.Reset(); // Optimize the draw order (using v1.2 path: TOOTLE_OVERDRAW_AUTO, the default path is from v2.0--SIGGRAPH version). result = TootleOptimizeOverdraw(pfVB, pnIB, nVertices, nFaces, nStride, pViewpoints, nViewpoints, settings.eWinding, &faceClusters[0], pnIB, NULL, TOOTLE_OVERDRAW_AUTO); if (result != TOOTLE_OK) { DisplayTootleErrorMessage(result); return 1; } stats.fOptimizeOverdrawTime = timer.GetElapsed(); break; case TOOTLE_FAST_VCACHECLUSTER_OVERDRAW: // ******************************************************************************************************************* // An example of calling v2.0 optimize vertex cache and clustering mesh with v1.2 overdraw optimization. // ******************************************************************************************************************* // Optimize vertex cache and create cluster // The algorithm from SIGGRAPH combine the vertex cache optimization and clustering mesh into a single step result = TootleFastOptimizeVCacheAndClusterMesh(pnIB, nFaces, nVertices, settings.nCacheSize, pnIB, &faceClusters[0], &nNumClusters, TOOTLE_DEFAULT_ALPHA); if (result != TOOTLE_OK) { // an error detected DisplayTootleErrorMessage(result); return 1; } stats.fOptimizeVCacheAndClusterMeshTime = timer.GetElapsed(); timer.Reset(); stats.nClusters = nNumClusters; // In this example, we use TOOTLE_OVERDRAW_AUTO to show that we can mix-match the clustering and // vcache computation from the new library with the overdraw optimization from the old library. // TOOTLE_OVERDRAW_AUTO will choose between using Direct3D or CPU raytracing path. This path is // much slower than TOOTLE_OVERDRAW_FAST but usually produce 2x better results. result = TootleOptimizeOverdraw(pfVB, pnIB, nVertices, nFaces, nStride, NULL, 0, settings.eWinding, &faceClusters[0], pnIB, NULL, TOOTLE_OVERDRAW_AUTO); if (result != TOOTLE_OK) { // an error detected DisplayTootleErrorMessage(result); return 1; } stats.fOptimizeOverdrawTime = timer.GetElapsed(); break; case TOOTLE_OPTIMIZE: // ******************************************************************************************************************* // An example of using a single utility function to perform v1.2 optimizations. // ******************************************************************************************************************* // This function will compute the entire optimization (cluster mesh, vcache per cluster, and optimize overdraw). // It will use TOOTLE_OVERDRAW_FAST as the default overdraw optimization result = TootleOptimize(pfVB, pnIB, nVertices, nFaces, nStride, settings.nCacheSize, pViewpoints, nViewpoints, settings.eWinding, pnIB, &nNumClusters, settings.eVCacheOptimizer); if (result != TOOTLE_OK) { DisplayTootleErrorMessage(result); return 1; } stats.fTootleOptimizeTime = timer.GetElapsed(); stats.nClusters = nNumClusters; break; case TOOTLE_FAST_OPTIMIZE: // ******************************************************************************************************************* // An example of using a single utility function to perform v2.0 optimizations. // ******************************************************************************************************************* // This function will compute the entire optimization (optimize vertex cache, cluster mesh, and optimize overdraw). // It will use TOOTLE_OVERDRAW_FAST as the default overdraw optimization result = TootleFastOptimize(pfVB, pnIB, nVertices, nFaces, nStride, settings.nCacheSize, settings.eWinding, pnIB, &nNumClusters, TOOTLE_DEFAULT_ALPHA); if (result != TOOTLE_OK) { DisplayTootleErrorMessage(result); return 1; } stats.fTootleFastOptimizeTime = timer.GetElapsed(); stats.nClusters = nNumClusters; break; default: // wrong algorithm choice break; } // measure output VCache efficiency result = TootleMeasureCacheEfficiency(pnIB, nFaces, settings.nCacheSize, &stats.fVCacheOut); if (result != TOOTLE_OK) { DisplayTootleErrorMessage(result); return 1; } if (settings.bMeasureOverdraw) { // measure output overdraw timer.Reset(); result = TootleMeasureOverdraw(pfVB, pnIB, nVertices, nFaces, nStride, pViewpoints, nViewpoints, settings.eWinding, &stats.fOverdrawOut, &stats.fMaxOverdrawOut); stats.fMeasureOverdrawTime = timer.GetElapsed(); if (result != TOOTLE_OK) { DisplayTootleErrorMessage(result); return 1; } } //----------------------------------------------------------------------------------------------------- // PERFORM VERTEX MEMORY OPTIMIZATION (rearrange memory layout for vertices based on the final indices // to exploit vertex cache prefetch). // We want to optimize the vertex memory locations based on the final optimized index buffer that will // be in the output file. // Thus, in this sample code, we recompute a copy of the indices that point to the original vertices // (pnIBTmp) to be passed into the function TootleOptimizeVertexMemory. If we use the array pnIB, we // will optimize for the wrong result since the array pnIB is based on the rehashed vertex location created // by the function ObjLoader. //----------------------------------------------------------------------------------------------------- timer.Reset(); std::vector<unsigned int> pnVertexRemapping; unsigned int nReferencedVertices = 0; // The actual total number of vertices referenced by the indices if (settings.bOptimizeVertexMemory) { std::vector<unsigned int> pnIBTmp; pnIBTmp.resize(nFaces * 3); // compute the indices to be optimized for (the original pointed by the obj file). for (unsigned int i = 0; i < indices.size(); i += 3) { for (int j = 0; j < 3; j++) { const ObjVertexFinal& rVertex = objVertices[ pnIB[ i + j ] ]; pnIBTmp[ i + j ] = rVertex.nVertexIndex - 1; // index is off by 1 // compute the max vertices if (rVertex.nVertexIndex > nReferencedVertices) { nReferencedVertices = rVertex.nVertexIndex; } } } pnVertexRemapping.resize(nReferencedVertices); // For this sample code, we are just going to use vertexRemapping array result. This is to support general obj // file input and output. // In fact, we are sending the wrong vertex buffer here (it should be based on the original file instead of the // rehashed vertices). But, it is ok because we do not request the reordered vertex buffer as an output. result = TootleOptimizeVertexMemory(pfVB, &pnIBTmp[0], nReferencedVertices, nFaces, nStride, NULL, NULL, &pnVertexRemapping[0]); if (result != TOOTLE_OK) { DisplayTootleErrorMessage(result); return 1; } stats.fOptimizeVertexMemoryTime = timer.GetElapsed(); } // clean up tootle TootleCleanup(); // print tootle statistics to stdout and stderr // display the current test case PrintAlgorithm(stderr, settings.eVCacheOptimizer, settings.algorithmChoice, settings.nCacheSize, stats.nClusters); PrintAlgorithm(stdout, settings.eVCacheOptimizer, settings.algorithmChoice, settings.nCacheSize, stats.nClusters); PrintStats(stdout, &stats); PrintStats(stderr, &stats); // emit a modified .OBJ file std::ifstream inputStream(settings.pMeshName); bool bResult; if (settings.bOptimizeVertexMemory) { bResult = EmitModifiedObj(inputStream, std::cout, objVertices, indices, &pnVertexRemapping[0], nReferencedVertices); } else { bResult = EmitModifiedObj(inputStream, std::cout, objVertices, indices, NULL, 0); } if (bResult) { return 1; } return 0; }