// Adds this vertex index and returns the highest-scoring triangle index u32 add(u16 vert, bool updatetris = false) { bool found = false; // Mark existing pos as empty for (u16 i = 0; i < cachesize; i++) { if (cache[i] == vert) { // Move everything down for (u16 j = i; j; j--) { cache[j] = cache[j - 1]; } found = true; break; } } if (!found) { if (cache[cachesize-1] != -1) vc[cache[cachesize-1]].cachepos = -1; // Move everything down for (u16 i = cachesize - 1; i; i--) { cache[i] = cache[i - 1]; } } cache[0] = vert; u32 highest = 0; float hiscore = 0; if (updatetris) { // Update cache positions for (u16 i = 0; i < cachesize; i++) { if (cache[i] == -1) break; vc[cache[i]].cachepos = i; vc[cache[i]].score = FindVertexScore(&vc[cache[i]]); } // Update triangle scores for (u16 i = 0; i < cachesize; i++) { if (cache[i] == -1) break; const u16 trisize = vc[cache[i]].tris.size(); for (u16 t = 0; t < trisize; t++) { tcache *tri = &tc[vc[cache[i]].tris[t]]; tri->score = vc[tri->ind[0]].score + vc[tri->ind[1]].score + vc[tri->ind[2]].score; if (tri->score > hiscore) { hiscore = tri->score; highest = vc[cache[i]].tris[t]; } } } } return highest; }
/** Vertex cache optimization according to the Forsyth paper: http://home.comcast.net/~tom_forsyth/papers/fast_vert_cache_opt.html The function is thread-safe (read: you can optimize several meshes in different threads) \param mesh Source mesh for the operation. */ scene::IMesh* createForsythOptimizedMesh(const scene::IMesh *mesh) { if (!mesh) return 0; scene::SMesh *newmesh = new scene::SMesh(); newmesh->BoundingBox = mesh->getBoundingBox(); const u32 mbcount = mesh->getMeshBufferCount(); for (u32 b = 0; b < mbcount; ++b) { const scene::IMeshBuffer *mb = mesh->getMeshBuffer(b); if (mb->getIndexType() != video::EIT_16BIT) { //os::Printer::log("Cannot optimize a mesh with 32bit indices", ELL_ERROR); newmesh->drop(); return 0; } const u32 icount = mb->getIndexCount(); const u32 tcount = icount / 3; const u32 vcount = mb->getVertexCount(); const u16 *ind = mb->getIndices(); vcache *vc = new vcache[vcount]; tcache *tc = new tcache[tcount]; f_lru lru(vc, tc); // init for (u16 i = 0; i < vcount; i++) { vc[i].score = 0; vc[i].cachepos = -1; vc[i].NumActiveTris = 0; } // First pass: count how many times a vert is used for (u32 i = 0; i < icount; i += 3) { vc[ind[i]].NumActiveTris++; vc[ind[i + 1]].NumActiveTris++; vc[ind[i + 2]].NumActiveTris++; const u32 tri_ind = i/3; tc[tri_ind].ind[0] = ind[i]; tc[tri_ind].ind[1] = ind[i + 1]; tc[tri_ind].ind[2] = ind[i + 2]; } // Second pass: list of each triangle for (u32 i = 0; i < tcount; i++) { vc[tc[i].ind[0]].tris.push_back(i); vc[tc[i].ind[1]].tris.push_back(i); vc[tc[i].ind[2]].tris.push_back(i); tc[i].drawn = false; } // Give initial scores for (u16 i = 0; i < vcount; i++) { vc[i].score = FindVertexScore(&vc[i]); } for (u32 i = 0; i < tcount; i++) { tc[i].score = vc[tc[i].ind[0]].score + vc[tc[i].ind[1]].score + vc[tc[i].ind[2]].score; } switch(mb->getVertexType()) { case video::EVT_STANDARD: { video::S3DVertex *v = (video::S3DVertex *) mb->getVertices(); scene::SMeshBuffer *buf = new scene::SMeshBuffer(); buf->Material = mb->getMaterial(); buf->Vertices.reallocate(vcount); buf->Indices.reallocate(icount); core::map<const video::S3DVertex, const u16> sind; // search index for fast operation typedef core::map<const video::S3DVertex, const u16>::Node snode; // Main algorithm u32 highest = 0; u32 drawcalls = 0; for (;;) { if (tc[highest].drawn) { bool found = false; float hiscore = 0; for (u32 t = 0; t < tcount; t++) { if (!tc[t].drawn) { if (tc[t].score > hiscore) { highest = t; hiscore = tc[t].score; found = true; } } } if (!found) break; } // Output the best triangle u16 newind = buf->Vertices.size(); snode *s = sind.find(v[tc[highest].ind[0]]); if (!s) { buf->Vertices.push_back(v[tc[highest].ind[0]]); buf->Indices.push_back(newind); sind.insert(v[tc[highest].ind[0]], newind); newind++; } else { buf->Indices.push_back(s->getValue()); } s = sind.find(v[tc[highest].ind[1]]); if (!s) { buf->Vertices.push_back(v[tc[highest].ind[1]]); buf->Indices.push_back(newind); sind.insert(v[tc[highest].ind[1]], newind); newind++; } else { buf->Indices.push_back(s->getValue()); } s = sind.find(v[tc[highest].ind[2]]); if (!s) { buf->Vertices.push_back(v[tc[highest].ind[2]]); buf->Indices.push_back(newind); sind.insert(v[tc[highest].ind[2]], newind); } else { buf->Indices.push_back(s->getValue()); } vc[tc[highest].ind[0]].NumActiveTris--; vc[tc[highest].ind[1]].NumActiveTris--; vc[tc[highest].ind[2]].NumActiveTris--; tc[highest].drawn = true; for (u16 j = 0; j < 3; j++) { vcache *vert = &vc[tc[highest].ind[j]]; for (u16 t = 0; t < vert->tris.size(); t++) { if (highest == vert->tris[t]) { vert->tris.erase(t); break; } } } lru.add(tc[highest].ind[0]); lru.add(tc[highest].ind[1]); highest = lru.add(tc[highest].ind[2], true); drawcalls++; } buf->setBoundingBox(mb->getBoundingBox()); newmesh->addMeshBuffer(buf); buf->drop(); } break; case video::EVT_2TCOORDS: { video::S3DVertex2TCoords *v = (video::S3DVertex2TCoords *) mb->getVertices(); scene::SMeshBufferLightMap *buf = new scene::SMeshBufferLightMap(); buf->Material = mb->getMaterial(); buf->Vertices.reallocate(vcount); buf->Indices.reallocate(icount); core::map<const video::S3DVertex2TCoords, const u16> sind; // search index for fast operation typedef core::map<const video::S3DVertex2TCoords, const u16>::Node snode; // Main algorithm u32 highest = 0; u32 drawcalls = 0; for (;;) { if (tc[highest].drawn) { bool found = false; float hiscore = 0; for (u32 t = 0; t < tcount; t++) { if (!tc[t].drawn) { if (tc[t].score > hiscore) { highest = t; hiscore = tc[t].score; found = true; } } } if (!found) break; } // Output the best triangle u16 newind = buf->Vertices.size(); snode *s = sind.find(v[tc[highest].ind[0]]); if (!s) { buf->Vertices.push_back(v[tc[highest].ind[0]]); buf->Indices.push_back(newind); sind.insert(v[tc[highest].ind[0]], newind); newind++; } else { buf->Indices.push_back(s->getValue()); } s = sind.find(v[tc[highest].ind[1]]); if (!s) { buf->Vertices.push_back(v[tc[highest].ind[1]]); buf->Indices.push_back(newind); sind.insert(v[tc[highest].ind[1]], newind); newind++; } else { buf->Indices.push_back(s->getValue()); } s = sind.find(v[tc[highest].ind[2]]); if (!s) { buf->Vertices.push_back(v[tc[highest].ind[2]]); buf->Indices.push_back(newind); sind.insert(v[tc[highest].ind[2]], newind); } else { buf->Indices.push_back(s->getValue()); } vc[tc[highest].ind[0]].NumActiveTris--; vc[tc[highest].ind[1]].NumActiveTris--; vc[tc[highest].ind[2]].NumActiveTris--; tc[highest].drawn = true; for (u16 j = 0; j < 3; j++) { vcache *vert = &vc[tc[highest].ind[j]]; for (u16 t = 0; t < vert->tris.size(); t++) { if (highest == vert->tris[t]) { vert->tris.erase(t); break; } } } lru.add(tc[highest].ind[0]); lru.add(tc[highest].ind[1]); highest = lru.add(tc[highest].ind[2]); drawcalls++; } buf->setBoundingBox(mb->getBoundingBox()); newmesh->addMeshBuffer(buf); buf->drop(); } break; case video::EVT_TANGENTS: { video::S3DVertexTangents *v = (video::S3DVertexTangents *) mb->getVertices(); scene::SMeshBufferTangents *buf = new scene::SMeshBufferTangents(); buf->Material = mb->getMaterial(); buf->Vertices.reallocate(vcount); buf->Indices.reallocate(icount); core::map<const video::S3DVertexTangents, const u16> sind; // search index for fast operation typedef core::map<const video::S3DVertexTangents, const u16>::Node snode; // Main algorithm u32 highest = 0; u32 drawcalls = 0; for (;;) { if (tc[highest].drawn) { bool found = false; float hiscore = 0; for (u32 t = 0; t < tcount; t++) { if (!tc[t].drawn) { if (tc[t].score > hiscore) { highest = t; hiscore = tc[t].score; found = true; } } } if (!found) break; } // Output the best triangle u16 newind = buf->Vertices.size(); snode *s = sind.find(v[tc[highest].ind[0]]); if (!s) { buf->Vertices.push_back(v[tc[highest].ind[0]]); buf->Indices.push_back(newind); sind.insert(v[tc[highest].ind[0]], newind); newind++; } else { buf->Indices.push_back(s->getValue()); } s = sind.find(v[tc[highest].ind[1]]); if (!s) { buf->Vertices.push_back(v[tc[highest].ind[1]]); buf->Indices.push_back(newind); sind.insert(v[tc[highest].ind[1]], newind); newind++; } else { buf->Indices.push_back(s->getValue()); } s = sind.find(v[tc[highest].ind[2]]); if (!s) { buf->Vertices.push_back(v[tc[highest].ind[2]]); buf->Indices.push_back(newind); sind.insert(v[tc[highest].ind[2]], newind); } else { buf->Indices.push_back(s->getValue()); } vc[tc[highest].ind[0]].NumActiveTris--; vc[tc[highest].ind[1]].NumActiveTris--; vc[tc[highest].ind[2]].NumActiveTris--; tc[highest].drawn = true; for (u16 j = 0; j < 3; j++) { vcache *vert = &vc[tc[highest].ind[j]]; for (u16 t = 0; t < vert->tris.size(); t++) { if (highest == vert->tris[t]) { vert->tris.erase(t); break; } } } lru.add(tc[highest].ind[0]); lru.add(tc[highest].ind[1]); highest = lru.add(tc[highest].ind[2]); drawcalls++; } buf->setBoundingBox(mb->getBoundingBox()); newmesh->addMeshBuffer(buf); buf->drop(); } break; } delete [] vc; delete [] tc; } // for each meshbuffer return newmesh; }
void OptimizeFaces(const uint16* indexList, uint indexCount, uint vertexCount, uint16* newIndexList, uint16 lruCacheSize) { std::vector<OptimizeVertexData> vertexDataList; vertexDataList.resize(vertexCount); // compute face count per vertex for (uint i=0; i<indexCount; ++i) { uint16 index = indexList[i]; assert(index < vertexCount); OptimizeVertexData& vertexData = vertexDataList[index]; vertexData.activeFaceListSize++; } std::vector<uint> activeFaceList; const uint16 kEvictedCacheIndex = std::numeric_limits<uint16>::max(); { // allocate face list per vertex uint curActiveFaceListPos = 0; for (uint i=0; i<vertexCount; ++i) { OptimizeVertexData& vertexData = vertexDataList[i]; vertexData.cachePos0 = kEvictedCacheIndex; vertexData.cachePos1 = kEvictedCacheIndex; vertexData.activeFaceListStart = curActiveFaceListPos; curActiveFaceListPos += vertexData.activeFaceListSize; vertexData.score = FindVertexScore(vertexData.activeFaceListSize, vertexData.cachePos0, lruCacheSize); vertexData.activeFaceListSize = 0; } activeFaceList.resize(curActiveFaceListPos); } // fill out face list per vertex for (uint i=0; i<indexCount; i+=3) { for (uint j=0; j<3; ++j) { uint16 index = indexList[i+j]; OptimizeVertexData& vertexData = vertexDataList[index]; activeFaceList[vertexData.activeFaceListStart + vertexData.activeFaceListSize] = i; vertexData.activeFaceListSize++; } } std::vector<byte> processedFaceList; processedFaceList.resize(indexCount); uint16 vertexCacheBuffer[(kMaxVertexCacheSize+3)*2]; uint16* cache0 = vertexCacheBuffer; uint16* cache1 = vertexCacheBuffer+(kMaxVertexCacheSize+3); uint16 entriesInCache0 = 0; uint bestFace = 0; float bestScore = -1.f; const float maxValenceScore = FindVertexScore(1, kEvictedCacheIndex, lruCacheSize) * 3.f; for (uint i = 0; i < indexCount; i += 3) { if (bestScore < 0.f) { // no verts in the cache are used by any unprocessed faces so // search all unprocessed faces for a new starting point for (uint j = 0; j < indexCount; j += 3) { if (processedFaceList[j] == 0) { uint face = j; float faceScore = 0.f; for (uint k=0; k<3; ++k) { uint16 index = indexList[face+k]; OptimizeVertexData& vertexData = vertexDataList[index]; assert(vertexData.activeFaceListSize > 0); assert(vertexData.cachePos0 >= lruCacheSize); faceScore += vertexData.score; } if (faceScore > bestScore) { bestScore = faceScore; bestFace = face; assert(bestScore <= maxValenceScore); if (bestScore >= maxValenceScore) { break; } } } } assert(bestScore >= 0.f); } processedFaceList[bestFace] = 1; uint16 entriesInCache1 = 0; // add bestFace to LRU cache and to newIndexList for (uint v = 0; v < 3; ++v) { uint16 index = indexList[bestFace+v]; newIndexList[i+v] = index; OptimizeVertexData& vertexData = vertexDataList[index]; if (vertexData.cachePos1 >= entriesInCache1) { vertexData.cachePos1 = entriesInCache1; cache1[entriesInCache1++] = index; if (vertexData.activeFaceListSize == 1) { --vertexData.activeFaceListSize; continue; } } assert(vertexData.activeFaceListSize > 0); uint* begin = &activeFaceList[vertexData.activeFaceListStart]; uint* end = &activeFaceList[vertexData.activeFaceListStart + vertexData.activeFaceListSize]; uint* it = std::find(begin, end, bestFace); assert(it != end); std::swap(*it, *(end-1)); --vertexData.activeFaceListSize; vertexData.score = FindVertexScore(vertexData.activeFaceListSize, vertexData.cachePos1, lruCacheSize); } // move the rest of the old verts in the cache down and compute their new scores for (uint c0 = 0; c0 < entriesInCache0; ++c0) { uint16 index = cache0[c0]; OptimizeVertexData& vertexData = vertexDataList[index]; if (vertexData.cachePos1 >= entriesInCache1) { vertexData.cachePos1 = entriesInCache1; cache1[entriesInCache1++] = index; vertexData.score = FindVertexScore(vertexData.activeFaceListSize, vertexData.cachePos1, lruCacheSize); } } // find the best scoring triangle in the current cache (including up to 3 that were just evicted) bestScore = -1.f; for (uint c1 = 0; c1 < entriesInCache1; ++c1) { uint16 index = cache1[c1]; OptimizeVertexData& vertexData = vertexDataList[index]; vertexData.cachePos0 = vertexData.cachePos1; vertexData.cachePos1 = kEvictedCacheIndex; for (uint j=0; j<vertexData.activeFaceListSize; ++j) { uint face = activeFaceList[vertexData.activeFaceListStart+j]; float faceScore = 0.f; for (uint v=0; v<3; v++) { uint16 faceIndex = indexList[face+v]; OptimizeVertexData& faceVertexData = vertexDataList[faceIndex]; faceScore += faceVertexData.score; } if (faceScore > bestScore) { bestScore = faceScore; bestFace = face; } } } std::swap(cache0, cache1); entriesInCache0 = std::min(entriesInCache1, lruCacheSize); } }