/////////////////////////////////////////////////////////////////////////////////////////// // SplitUpStripsAndOptimize() // // Splits the input vector of strips (allBigStrips) into smaller, cache friendly pieces, then // reorders these pieces to maximize cache hits // The final strips are output through outStrips // void NvStripifier::SplitUpStripsAndOptimize(NvStripInfoVec &allStrips, NvStripInfoVec &outStrips, NvEdgeInfoVec& edgeInfos, NvFaceInfoVec& outFaceList) { int threshold = cacheSize; NvStripInfoVec tempStrips; //split up strips into threshold-sized pieces for(size_t i = 0; i < allStrips.size(); i++) { NvStripInfo* currentStrip; NvStripStartInfo startInfo(NULL, NULL, false); int actualStripSize = 0; for(size_t j = 0; j < allStrips[i]->m_faces.size(); ++j) { if( !IsDegenerate(allStrips[i]->m_faces[j]) ) actualStripSize++; } if(actualStripSize /*allStrips[i]->m_faces.size()*/ > threshold) { int numTimes = actualStripSize /*allStrips[i]->m_faces.size()*/ / threshold; int numLeftover = actualStripSize /*allStrips[i]->m_faces.size()*/ % threshold; int degenerateCount = 0; int j = 0; for(; j < numTimes; j++) { currentStrip = new NvStripInfo(startInfo, 0, -1); int faceCtr = j*threshold + degenerateCount; bool bFirstTime = true; while(faceCtr < threshold+(j*threshold)+degenerateCount) { if(IsDegenerate(allStrips[i]->m_faces[faceCtr])) { degenerateCount++; //last time or first time through, no need for a degenerate if( (((faceCtr + 1) != threshold+(j*threshold)+degenerateCount) || ((j == numTimes - 1) && (numLeftover < 4) && (numLeftover > 0))) && !bFirstTime) { currentStrip->m_faces.push_back(allStrips[i]->m_faces[faceCtr++]); } else { //but, we do need to delete the degenerate, if it's marked fake, to avoid leaking if(allStrips[i]->m_faces[faceCtr]->m_bIsFake) { delete allStrips[i]->m_faces[faceCtr], allStrips[i]->m_faces[faceCtr] = NULL; } ++faceCtr; } } else { currentStrip->m_faces.push_back(allStrips[i]->m_faces[faceCtr++]); bFirstTime = false; } } /* for(int faceCtr = j*threshold; faceCtr < threshold+(j*threshold); faceCtr++) { currentStrip->m_faces.push_back(allStrips[i]->m_faces[faceCtr]); } */ if(j == numTimes - 1) //last time through { if( (numLeftover < 4) && (numLeftover > 0) ) //way too small { //just add to last strip int ctr = 0; while(ctr < numLeftover) { IsDegenerate( allStrips[i]->m_faces[faceCtr] ) ? ++degenerateCount : ++ctr; currentStrip->m_faces.push_back(allStrips[i]->m_faces[faceCtr++]); } numLeftover = 0; } } tempStrips.push_back(currentStrip); } int leftOff = j * threshold + degenerateCount; if(numLeftover != 0) { currentStrip = new NvStripInfo(startInfo, 0, -1); int ctr = 0; bool bFirstTime = true; while(ctr < numLeftover) { if( !IsDegenerate(allStrips[i]->m_faces[leftOff]) ) { ctr++; bFirstTime = false; currentStrip->m_faces.push_back(allStrips[i]->m_faces[leftOff++]); } else if(!bFirstTime) currentStrip->m_faces.push_back(allStrips[i]->m_faces[leftOff++]); else { //don't leak if(allStrips[i]->m_faces[leftOff]->m_bIsFake) { delete allStrips[i]->m_faces[leftOff], allStrips[i]->m_faces[leftOff] = NULL; } leftOff++; } } /* for(size_t k = 0; k < numLeftover; k++) { currentStrip->m_faces.push_back(allStrips[i]->m_faces[leftOff++]); } */ tempStrips.push_back(currentStrip); } } else { //we're not just doing a tempStrips.push_back(allBigStrips[i]) because // this way we can delete allBigStrips later to free the memory currentStrip = new NvStripInfo(startInfo, 0, -1); for(size_t j = 0; j < allStrips[i]->m_faces.size(); j++) currentStrip->m_faces.push_back(allStrips[i]->m_faces[j]); tempStrips.push_back(currentStrip); } } //add small strips to face list NvStripInfoVec tempStrips2; RemoveSmallStrips(tempStrips, tempStrips2, outFaceList); outStrips.clear(); //screw optimization for now // for(size_t i = 0; i < tempStrips.size(); ++i) // outStrips.push_back(tempStrips[i]); if(tempStrips2.size() != 0) { //Optimize for the vertex cache VertexCache* vcache = new VertexCache(cacheSize); float bestNumHits = -1.0f; float numHits; int bestIndex; bool done = false; int firstIndex = 0; float minCost = 10000.0f; for(size_t i = 0; i < tempStrips2.size(); i++) { int numNeighbors = 0; //find strip with least number of neighbors per face for(size_t j = 0; j < tempStrips2[i]->m_faces.size(); j++) { numNeighbors += NumNeighbors(tempStrips2[i]->m_faces[j], edgeInfos); } float currCost = (float)numNeighbors / (float)tempStrips2[i]->m_faces.size(); if(currCost < minCost) { minCost = currCost; firstIndex = i; } } UpdateCacheStrip(vcache, tempStrips2[firstIndex]); outStrips.push_back(tempStrips2[firstIndex]); tempStrips2[firstIndex]->visited = true; bool bWantsCW = (tempStrips2[firstIndex]->m_faces.size() % 2) == 0; //this n^2 algo is what slows down stripification so much.... // needs to be improved while(1) { bestNumHits = -1.0f; //find best strip to add next, given the current cache for(size_t i = 0; i < tempStrips2.size(); i++) { if(tempStrips2[i]->visited) continue; numHits = CalcNumHitsStrip(vcache, tempStrips2[i]); if(numHits > bestNumHits) { bestNumHits = numHits; bestIndex = i; } else if(numHits >= bestNumHits) { //check previous strip to see if this one requires it to switch polarity NvStripInfo *strip = tempStrips2[i]; int nStripFaceCount = strip->m_faces.size(); NvFaceInfo tFirstFace(strip->m_faces[0]->m_v0, strip->m_faces[0]->m_v1, strip->m_faces[0]->m_v2); // If there is a second face, reorder vertices such that the // unique vertex is first if (nStripFaceCount > 1) { int nUnique = NvStripifier::GetUniqueVertexInB(strip->m_faces[1], &tFirstFace); if (nUnique == tFirstFace.m_v1) { SWAP(tFirstFace.m_v0, tFirstFace.m_v1); } else if (nUnique == tFirstFace.m_v2) { SWAP(tFirstFace.m_v0, tFirstFace.m_v2); } // If there is a third face, reorder vertices such that the // shared vertex is last if (nStripFaceCount > 2) { int nShared0, nShared1; GetSharedVertices(strip->m_faces[2], &tFirstFace, &nShared0, &nShared1); if ( (nShared0 == tFirstFace.m_v1) && (nShared1 == -1) ) { SWAP(tFirstFace.m_v1, tFirstFace.m_v2); } } } // Check CW/CCW ordering if (bWantsCW == IsCW(strip->m_faces[0], tFirstFace.m_v0, tFirstFace.m_v1)) { //I like this one! bestIndex = i; } } } if(bestNumHits == -1.0f) break; tempStrips2[bestIndex]->visited = true; UpdateCacheStrip(vcache, tempStrips2[bestIndex]); outStrips.push_back(tempStrips2[bestIndex]); bWantsCW = (tempStrips2[bestIndex]->m_faces.size() % 2 == 0) ? bWantsCW : !bWantsCW; } delete vcache; } }
/////////////////////////////////////////////////////////////////////////////////////////// // SplitUpStripsAndOptimize() // // Splits the input _vector_ of strips (allBigStrips) into smaller, cache friendly pieces, then // reorders these pieces to maximize cache hits // The final strips are output through outStrips // void NvStripifier::SplitUpStripsAndOptimize(NvStripInfoVec &allStrips, NvStripInfoVec &outStrips, NvEdgeInfoVec& edgeInfos, NvFaceInfoVec& outFaceList) { int threshold = cacheSize; NvStripInfoVec tempStrips; //split up strips into threshold-sized pieces int i; for(i = 0; i < allStrips.size(); i++) { NvStripInfo* currentStrip; NvStripStartInfo startInfo(NULL, NULL, false); if(allStrips[i]->m_faces.size() > threshold) { int numTimes = allStrips[i]->m_faces.size() / threshold; int numLeftover = allStrips[i]->m_faces.size() % threshold; int j; for(j = 0; j < numTimes; j++) { currentStrip = xr_new<NvStripInfo> (startInfo, 0, -1); for(int faceCtr = j*threshold; faceCtr < threshold+(j*threshold); faceCtr++) { currentStrip->m_faces.push_back(allStrips[i]->m_faces[faceCtr]); } tempStrips.push_back(currentStrip); } int leftOff = j * threshold; if(numLeftover != 0) { currentStrip = xr_new<NvStripInfo> (startInfo, 0, -1); for(int k = 0; k < numLeftover; k++) { currentStrip->m_faces.push_back(allStrips[i]->m_faces[leftOff++]); } tempStrips.push_back(currentStrip); } } else { //we're not just doing a tempStrips.push_back(allBigStrips[i]) because // this way we can _delete allBigStrips later to xr_free the memory currentStrip = xr_new<NvStripInfo> (startInfo, 0, -1); for(int j = 0; j < allStrips[i]->m_faces.size(); j++) currentStrip->m_faces.push_back(allStrips[i]->m_faces[j]); tempStrips.push_back(currentStrip); } } //add small strips to face list NvStripInfoVec tempStrips2; RemoveSmallStrips(tempStrips, tempStrips2, outFaceList); outStrips.clear(); if(tempStrips2.size() != 0) { //Optimize for the vertex cache VertexCache* vcache = xr_new<VertexCache> (cacheSize); float bestNumHits = -1.0f; float numHits = 0; int bestIndex = 0; int firstIndex = 0; float minCost = 10000.0f; for(i = 0; i < tempStrips2.size(); i++) { int numNeighbors = 0; //find strip with least number of neighbors per face for(int j = 0; j < tempStrips2[i]->m_faces.size(); j++) { numNeighbors += NumNeighbors(tempStrips2[i]->m_faces[j], edgeInfos); } float currCost = (float)numNeighbors / (float)tempStrips2[i]->m_faces.size(); if(currCost < minCost) { minCost = currCost; firstIndex = i; } } UpdateCacheStrip(vcache, tempStrips2[firstIndex]); outStrips.push_back(tempStrips2[firstIndex]); tempStrips2[firstIndex]->visited = true; //this n^2 algo is what slows down stripification so much.... // needs to be improved while(1) { bestNumHits = -1.0f; //find best strip to add next, given the current cache for(int i = 0; i < tempStrips2.size(); i++) { if(tempStrips2[i]->visited) continue; numHits = CalcNumHitsStrip(vcache, tempStrips2[i]); if(numHits > bestNumHits) { bestNumHits = numHits; bestIndex = i; } } if(bestNumHits == -1.0f) break; tempStrips2[bestIndex]->visited = true; UpdateCacheStrip(vcache, tempStrips2[bestIndex]); outStrips.push_back(tempStrips2[bestIndex]); } xr_delete(vcache); } }