///////////////////////////////////////////////////////////////////////////////////////////
// SplitUpStripsAndOptimize()
//
// Splits the input vector of strips (allBigStrips) into smaller, cache friendly pieces, then
//  reorders these pieces to maximize cache hits
// The final strips are output through outStrips
//
void NvStripifier::SplitUpStripsAndOptimize(NvStripInfoVec &allStrips, NvStripInfoVec &outStrips,
                                            NvEdgeInfoVec& edgeInfos, NvFaceInfoVec& outFaceList)
{
	int threshold = cacheSize;
	NvStripInfoVec tempStrips;
	
	//split up strips into threshold-sized pieces
	for(size_t i = 0; i < allStrips.size(); i++)
	{
		NvStripInfo* currentStrip;
		NvStripStartInfo startInfo(NULL, NULL, false);
	
		int actualStripSize = 0;
		for(size_t j = 0; j < allStrips[i]->m_faces.size(); ++j)
		{
			if( !IsDegenerate(allStrips[i]->m_faces[j]) )
				actualStripSize++;
		}
		
		if(actualStripSize /*allStrips[i]->m_faces.size()*/ > threshold)
		{
			
			int numTimes    = actualStripSize /*allStrips[i]->m_faces.size()*/ / threshold;
			int numLeftover = actualStripSize /*allStrips[i]->m_faces.size()*/ % threshold;

			int degenerateCount = 0;
            int j = 0;
			for(; j < numTimes; j++)
			{
				currentStrip = new NvStripInfo(startInfo, 0, -1);
				
				int faceCtr = j*threshold + degenerateCount;
				bool bFirstTime = true;
				while(faceCtr < threshold+(j*threshold)+degenerateCount)
				{
					if(IsDegenerate(allStrips[i]->m_faces[faceCtr]))
					{
						degenerateCount++;
						
						//last time or first time through, no need for a degenerate
						if( (((faceCtr + 1) != threshold+(j*threshold)+degenerateCount) ||
							 ((j == numTimes - 1) && (numLeftover < 4) && (numLeftover > 0))) && 
							 !bFirstTime)
						{
							currentStrip->m_faces.push_back(allStrips[i]->m_faces[faceCtr++]);
						}
						else
						{
							//but, we do need to delete the degenerate, if it's marked fake, to avoid leaking
							if(allStrips[i]->m_faces[faceCtr]->m_bIsFake)
							{
								delete allStrips[i]->m_faces[faceCtr], allStrips[i]->m_faces[faceCtr] = NULL;
							}
							++faceCtr;
						}
					}
					else
					{
						currentStrip->m_faces.push_back(allStrips[i]->m_faces[faceCtr++]);
						bFirstTime = false;
					}
				}
				/*
				for(int faceCtr = j*threshold; faceCtr < threshold+(j*threshold); faceCtr++)
				{
					currentStrip->m_faces.push_back(allStrips[i]->m_faces[faceCtr]);
				}
				*/
				if(j == numTimes - 1) //last time through
				{
					if( (numLeftover < 4) && (numLeftover > 0) ) //way too small
					{
						//just add to last strip
						int ctr = 0;
						while(ctr < numLeftover)
						{
							IsDegenerate( allStrips[i]->m_faces[faceCtr] ) ? ++degenerateCount : ++ctr;
							currentStrip->m_faces.push_back(allStrips[i]->m_faces[faceCtr++]);
						}
						numLeftover = 0;
					}
				}
				tempStrips.push_back(currentStrip);
			}
			
			int leftOff = j * threshold + degenerateCount;
			
			if(numLeftover != 0)
			{
				currentStrip = new NvStripInfo(startInfo, 0, -1);   
				
				int ctr = 0;
				bool bFirstTime = true;
				while(ctr < numLeftover)
				{
					if( !IsDegenerate(allStrips[i]->m_faces[leftOff]) )
					{
						ctr++;
						bFirstTime = false;
						currentStrip->m_faces.push_back(allStrips[i]->m_faces[leftOff++]);
					}
					else if(!bFirstTime)
						currentStrip->m_faces.push_back(allStrips[i]->m_faces[leftOff++]);
					else
					{
						//don't leak
						if(allStrips[i]->m_faces[leftOff]->m_bIsFake)
						{
							delete allStrips[i]->m_faces[leftOff], allStrips[i]->m_faces[leftOff] = NULL;
						}

						leftOff++;
					}
				}
				/*
				for(size_t k = 0; k < numLeftover; k++)
				{
					currentStrip->m_faces.push_back(allStrips[i]->m_faces[leftOff++]);
				}
				*/
				
				tempStrips.push_back(currentStrip);
			}
		}
		else
		{
			//we're not just doing a tempStrips.push_back(allBigStrips[i]) because
			// this way we can delete allBigStrips later to free the memory
			currentStrip = new NvStripInfo(startInfo, 0, -1);
			
			for(size_t j = 0; j < allStrips[i]->m_faces.size(); j++)
				currentStrip->m_faces.push_back(allStrips[i]->m_faces[j]);
			
			tempStrips.push_back(currentStrip);
		}
	}

	//add small strips to face list
	NvStripInfoVec tempStrips2;
	RemoveSmallStrips(tempStrips, tempStrips2, outFaceList);
	
	outStrips.clear();
	//screw optimization for now
//	for(size_t i = 0; i < tempStrips.size(); ++i)
//    outStrips.push_back(tempStrips[i]);
	
	if(tempStrips2.size() != 0)
	{
		//Optimize for the vertex cache
		VertexCache* vcache = new VertexCache(cacheSize);
		
		float bestNumHits = -1.0f;
		float numHits;
		int bestIndex;
		bool done = false;
		
		int firstIndex = 0;
		float minCost = 10000.0f;
		
		for(size_t i = 0; i < tempStrips2.size(); i++)
		{
			int numNeighbors = 0;
			
			//find strip with least number of neighbors per face
			for(size_t j = 0; j < tempStrips2[i]->m_faces.size(); j++)
			{
				numNeighbors += NumNeighbors(tempStrips2[i]->m_faces[j], edgeInfos);
			}
			
			float currCost = (float)numNeighbors / (float)tempStrips2[i]->m_faces.size();
			if(currCost < minCost)
			{
				minCost = currCost;
				firstIndex = i;
			}
		}
		
		UpdateCacheStrip(vcache, tempStrips2[firstIndex]);
		outStrips.push_back(tempStrips2[firstIndex]);
		
		tempStrips2[firstIndex]->visited = true;
		
		bool bWantsCW = (tempStrips2[firstIndex]->m_faces.size() % 2) == 0;

		//this n^2 algo is what slows down stripification so much....
		// needs to be improved
		while(1)
		{
			bestNumHits = -1.0f;
			
			//find best strip to add next, given the current cache
			for(size_t i = 0; i < tempStrips2.size(); i++)
			{
				if(tempStrips2[i]->visited)
					continue;

				numHits = CalcNumHitsStrip(vcache, tempStrips2[i]);
				if(numHits > bestNumHits)
				{
					bestNumHits = numHits;
					bestIndex = i;
				}
				else if(numHits >= bestNumHits)
				{
					//check previous strip to see if this one requires it to switch polarity
					NvStripInfo *strip = tempStrips2[i];
					int nStripFaceCount = strip->m_faces.size();
					
					NvFaceInfo tFirstFace(strip->m_faces[0]->m_v0, strip->m_faces[0]->m_v1, strip->m_faces[0]->m_v2);
					
					// If there is a second face, reorder vertices such that the
					// unique vertex is first
					if (nStripFaceCount > 1)
					{
						int nUnique = NvStripifier::GetUniqueVertexInB(strip->m_faces[1], &tFirstFace);
						if (nUnique == tFirstFace.m_v1)
						{
							SWAP(tFirstFace.m_v0, tFirstFace.m_v1);
						}
						else if (nUnique == tFirstFace.m_v2)
						{
							SWAP(tFirstFace.m_v0, tFirstFace.m_v2);
						}
						
						// If there is a third face, reorder vertices such that the
						// shared vertex is last
						if (nStripFaceCount > 2)
						{
							int nShared0, nShared1;
							GetSharedVertices(strip->m_faces[2], &tFirstFace, &nShared0, &nShared1);
							if ( (nShared0 == tFirstFace.m_v1) && (nShared1 == -1) )
							{
								SWAP(tFirstFace.m_v1, tFirstFace.m_v2);
							}
						}
					}
						
					// Check CW/CCW ordering
					if (bWantsCW == IsCW(strip->m_faces[0], tFirstFace.m_v0, tFirstFace.m_v1))
					{
						//I like this one!
						bestIndex = i;
					}
				}
			}
			
			if(bestNumHits == -1.0f)
				break;
			tempStrips2[bestIndex]->visited = true;
			UpdateCacheStrip(vcache, tempStrips2[bestIndex]);
			outStrips.push_back(tempStrips2[bestIndex]);
			bWantsCW = (tempStrips2[bestIndex]->m_faces.size() % 2 == 0) ? bWantsCW : !bWantsCW;
		}
		
		delete vcache;	
	}
}
예제 #2
0
///////////////////////////////////////////////////////////////////////////////////////////
// SplitUpStripsAndOptimize()
//
// Splits the input _vector_ of strips (allBigStrips) into smaller, cache friendly pieces, then
//  reorders these pieces to maximize cache hits
// The final strips are output through outStrips
//
void NvStripifier::SplitUpStripsAndOptimize(NvStripInfoVec &allStrips, NvStripInfoVec &outStrips,
                                            NvEdgeInfoVec& edgeInfos, NvFaceInfoVec& outFaceList)
{
	int threshold = cacheSize;
	NvStripInfoVec tempStrips;
	
	//split up strips into threshold-sized pieces
	int i;
	for(i = 0; i < allStrips.size(); i++)
	{
		NvStripInfo* currentStrip;
		NvStripStartInfo startInfo(NULL, NULL, false);
		
		if(allStrips[i]->m_faces.size() > threshold)
		{
			
			int numTimes    = allStrips[i]->m_faces.size() / threshold;
			int numLeftover = allStrips[i]->m_faces.size() % threshold;
			
			int j;
			for(j = 0; j < numTimes; j++)
			{
				currentStrip = xr_new<NvStripInfo> (startInfo, 0, -1);
				
				for(int faceCtr = j*threshold; faceCtr < threshold+(j*threshold); faceCtr++) {
					currentStrip->m_faces.push_back(allStrips[i]->m_faces[faceCtr]);
				}
				
				tempStrips.push_back(currentStrip);
			}
			
			int leftOff = j * threshold;
			
			if(numLeftover != 0)
			{
				currentStrip = xr_new<NvStripInfo> (startInfo, 0, -1);   
				
				for(int k = 0; k < numLeftover; k++)
				{
					currentStrip->m_faces.push_back(allStrips[i]->m_faces[leftOff++]);
				}
				
				tempStrips.push_back(currentStrip);
			}
		}
		else
		{
			//we're not just doing a tempStrips.push_back(allBigStrips[i]) because
			// this way we can _delete allBigStrips later to xr_free the memory
			currentStrip = xr_new<NvStripInfo> (startInfo, 0, -1);
			
			for(int j = 0; j < allStrips[i]->m_faces.size(); j++)
				currentStrip->m_faces.push_back(allStrips[i]->m_faces[j]);
			
			tempStrips.push_back(currentStrip);
		}
	}

	//add small strips to face list
	NvStripInfoVec tempStrips2;
	RemoveSmallStrips(tempStrips, tempStrips2, outFaceList);
	
	outStrips.clear();
	if(tempStrips2.size() != 0)
	{
		//Optimize for the vertex cache
		VertexCache* vcache = xr_new<VertexCache> (cacheSize);
		
		float bestNumHits	= -1.0f;
		float numHits		= 0;
		int bestIndex		= 0;
		
		int firstIndex = 0;
		float minCost = 10000.0f;
		
		for(i = 0; i < tempStrips2.size(); i++)
		{
			int numNeighbors = 0;
			
			//find strip with least number of neighbors per face
			for(int j = 0; j < tempStrips2[i]->m_faces.size(); j++)
			{
				numNeighbors += NumNeighbors(tempStrips2[i]->m_faces[j], edgeInfos);
			}
			
			float currCost = (float)numNeighbors / (float)tempStrips2[i]->m_faces.size();
			if(currCost < minCost)
			{
				minCost = currCost;
				firstIndex = i;
			}
		}
		
		UpdateCacheStrip(vcache, tempStrips2[firstIndex]);
		outStrips.push_back(tempStrips2[firstIndex]);
		
		tempStrips2[firstIndex]->visited = true;
		
		//this n^2 algo is what slows down stripification so much....
		// needs to be improved
		while(1)
		{
			bestNumHits = -1.0f;
			
			//find best strip to add next, given the current cache
			for(int i = 0; i < tempStrips2.size(); i++)
			{
				if(tempStrips2[i]->visited)
					continue;
				
				numHits = CalcNumHitsStrip(vcache, tempStrips2[i]);
				if(numHits > bestNumHits)
				{
					bestNumHits = numHits;
					bestIndex = i;
				}
			}
			
			if(bestNumHits == -1.0f)
				break;
			tempStrips2[bestIndex]->visited = true;
			UpdateCacheStrip(vcache, tempStrips2[bestIndex]);
			outStrips.push_back(tempStrips2[bestIndex]);
		}
		
		xr_delete(vcache);	
	}
}