void BVH4BuilderFast::build_sequential(size_t threadIndex, size_t threadCount) { /* start measurement */ double t0 = 0.0f; if (g_verbose >= 2) t0 = getSeconds(); /* initialize node and leaf allocator */ nodeAllocator.reset(); primAllocator.reset(); __aligned(64) Allocator nodeAlloc(nodeAllocator); __aligned(64) Allocator leafAlloc(primAllocator); /* create prim refs */ global_bounds.reset(); computePrimRefs(0,1); bvh->bounds = global_bounds.geometry; /* create initial build record */ BuildRecord br; br.init(global_bounds,0,numPrimitives); br.depth = 1; br.parentNode = (size_t)&bvh->root; /* build BVH in single thread */ recurseSAH(br,nodeAlloc,leafAlloc,RECURSE_SEQUENTIAL,threadIndex,threadCount); /* stop measurement */ if (g_verbose >= 2) dt = getSeconds()-t0; }
bool BVH4BuilderFast::splitParallel(BuildRecord ¤t, BuildRecord &leftChild, BuildRecord &rightChild, const size_t threadID, const size_t numThreads) { const unsigned int items = current.end - current.begin; assert(items >= BUILD_RECORD_SPLIT_THRESHOLD); /* mark as leaf if leaf threshold reached */ if (items <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) { current.createLeaf(); return false; } /* use primitive array temporarily for parallel splits */ PrimRef* tmp = (PrimRef*) primAllocator.base(); /* parallel binning of centroids */ g_state->parallelBinner.bin(current,prims,tmp,threadID,numThreads); /* find best split */ Split split; g_state->parallelBinner.best(split); /* if we cannot find a valid split, enforce an arbitrary split */ if (unlikely(split.pos == -1)) split_fallback(prims,current,leftChild,rightChild); /* parallel partitioning of items */ else g_state->parallelBinner.partition(tmp,prims,split,leftChild,rightChild,threadID,numThreads); if (leftChild.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) leftChild.createLeaf(); if (rightChild.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) rightChild.createLeaf(); return true; }
void BVH4Triangle4vBuilderFast::createSmallLeaf(BuildRecord& current, Allocator& leafAlloc, size_t threadID) { size_t items = current.size(); size_t start = current.begin; assert(items<=4); /* allocate leaf node */ Triangle4v* accel = (Triangle4v*) leafAlloc.malloc(sizeof(Triangle4v)); *current.parent = bvh->encodeLeaf((char*)accel,1); ssei vgeomID = -1, vprimID = -1, vmask = -1; sse3f v0 = zero, v1 = zero, v2 = zero; for (size_t i=0; i<items; i++) { const size_t geomID = prims[start+i].geomID(); const size_t primID = prims[start+i].primID(); const TriangleMesh* __restrict__ const mesh = scene->getTriangleMesh(geomID); const TriangleMesh::Triangle& tri = mesh->triangle(primID); const Vec3fa& p0 = mesh->vertex(tri.v[0]); const Vec3fa& p1 = mesh->vertex(tri.v[1]); const Vec3fa& p2 = mesh->vertex(tri.v[2]); vgeomID [i] = geomID; vprimID [i] = primID; vmask [i] = mesh->mask; v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z; v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z; v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z; } Triangle4v::store_nt(accel,Triangle4v(v0,v1,v2,vgeomID,vprimID,vmask)); }
void BVH4Triangle1vBuilderFast::createSmallLeaf(BuildRecord& current, Allocator& leafAlloc, size_t threadID) { size_t items = current.size(); size_t start = current.begin; assert(items<=4); /* allocate leaf node */ Triangle1v* accel = (Triangle1v*) leafAlloc.malloc(items*sizeof(Triangle1v)); *current.parent = bvh->encodeLeaf((char*)accel,items); for (size_t i=0; i<items; i++) { const size_t geomID = prims[start+i].geomID(); const size_t primID = prims[start+i].primID(); const TriangleMesh* __restrict__ const mesh = scene->getTriangleMesh(geomID); const TriangleMesh::Triangle& tri = mesh->triangle(primID); const ssef v0 = select(0x7,(ssef)mesh->vertex(tri.v[0]),zero); const ssef v1 = select(0x7,(ssef)mesh->vertex(tri.v[1]),zero); const ssef v2 = select(0x7,(ssef)mesh->vertex(tri.v[2]),zero); const ssef e1 = v0 - v1; const ssef e2 = v2 - v0; const ssef normal = cross(e1,e2); store4f_nt(&accel[i].v0,cast(insert<3>(cast(v0),primID))); store4f_nt(&accel[i].v1,cast(insert<3>(cast(v1),geomID))); store4f_nt(&accel[i].v2,cast(insert<3>(cast(v2),mesh->mask))); } }
__forceinline void BVH4BuilderTopLevel::split(BuildRecord& current, BuildRecord& left, BuildRecord& right, const size_t mode, const size_t threadID, const size_t numThreads) { if (mode == BUILD_TOP_LEVEL && current.items() >= BUILD_RECORD_SPLIT_THRESHOLD) return split_parallel(current,left,right,threadID,numThreads); else return split_sequential(current,left,right); }
__forceinline void BVH4BuilderTopLevel::recurse(size_t depth, BuildRecord& current, const size_t mode, const size_t threadID, const size_t numThreads) { if (mode == BUILD_TOP_LEVEL) { g_state->global_workStack.push_nolock(current); } else if (current.items() > THRESHOLD_FOR_SUBTREE_RECURSION) { if (!g_state->thread_workStack[threadID].push(current)) recurseSAH(depth,current,RECURSE,threadID,numThreads); } else recurseSAH(depth,current,RECURSE,threadID,numThreads); }
__forceinline void BVH4BuilderFast::recurse(BuildRecord& current, Allocator& nodeAlloc, Allocator& leafAlloc, const size_t mode, const size_t threadID, const size_t numThreads) { if (mode == BUILD_TOP_LEVEL) { g_state->workStack.push_nolock(current); } else if (mode == RECURSE_PARALLEL && current.items() > THRESHOLD_FOR_SUBTREE_RECURSION) { if (!g_state->threadStack[threadID].push(current)) recurseSAH(current,nodeAlloc,leafAlloc,RECURSE_SEQUENTIAL,threadID,numThreads); } else recurseSAH(current,nodeAlloc,leafAlloc,mode,threadID,numThreads); }
bool BVH4BuilderFast::splitSequential(BuildRecord& current, BuildRecord& leftChild, BuildRecord& rightChild) { /* mark as leaf if leaf threshold reached */ if (current.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) { current.createLeaf(); return false; } /* calculate binning function */ Mapping<16> mapping(current.bounds); /* binning of centroids */ Binner<16> binner; binner.bin(prims,current.begin,current.end,mapping); /* find best split */ Split split; binner.best(split,mapping); /* if we cannot find a valid split, enforce an arbitrary split */ if (unlikely(split.pos == -1)) split_fallback(prims,current,leftChild,rightChild); /* partitioning of items */ else binner.partition(prims, current.begin, current.end, split, mapping, leftChild, rightChild); if (leftChild.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) leftChild.createLeaf(); if (rightChild.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) rightChild.createLeaf(); return true; }
void BVH4UserGeometryBuilderFast::createSmallLeaf(BuildRecord& current, Allocator& leafAlloc, size_t threadID) { size_t items = current.size(); size_t start = current.begin; /* allocate leaf node */ AccelSetItem* accel = (AccelSetItem*) leafAlloc.malloc(sizeof(AccelSetItem)*items); *current.parent = bvh->encodeLeaf(accel,items); for (size_t i=0; i<items; i++) { const PrimRef& prim = prims[start+i]; accel[i].accel = (AccelSet*) (UserGeometryBase*) scene->get(prim.geomID()); //(*accels)[prim.geomID()]; accel[i].item = prim.primID(); } }
void BVH4Bezier1iBuilderFast::createSmallLeaf(BuildRecord& current, Allocator& leafAlloc, size_t threadID) { size_t items = current.size(); size_t start = current.begin; /* allocate leaf node */ Bezier1i* accel = (Bezier1i*) leafAlloc.malloc(items*sizeof(Bezier1i)); *current.parent = bvh->encodeLeaf((char*)accel,items); for (size_t i=0; i<items; i++) { const size_t geomID = prims[start+i].geomID(); const size_t primID = prims[start+i].primID(); const BezierCurves* curves = scene->getBezierCurves(geomID); const Vec3fa& p0 = curves->vertex(curves->curve(primID)); new (&accel[i]) Bezier1i(&p0,geomID,primID); } }
void BVH4Triangle4iBuilderFast::createSmallLeaf(BuildRecord& current, Allocator& leafAlloc, size_t threadID) { size_t items = current.size(); size_t start = current.begin; assert(items<=4); /* allocate leaf node */ Triangle4i* accel = (Triangle4i*) leafAlloc.malloc(sizeof(Triangle4i)); *current.parent = bvh->encodeLeaf((char*)accel,1); ssei geomID = -1, primID = -1; Vec3f* v0[4] = { NULL, NULL, NULL, NULL }; ssei v1 = zero, v2 = zero; for (size_t i=0; i<items; i++) { const PrimRef& prim = prims[start+i]; const TriangleMesh* mesh = scene->getTriangleMesh(prim.geomID()); const TriangleMesh::Triangle& tri = mesh->triangle(prim.primID()); geomID[i] = prim.geomID(); primID[i] = prim.primID(); v0[i] = (Vec3f*) &mesh->vertex(tri.v[0]); v1[i] = (int*)&mesh->vertex(tri.v[1])-(int*)v0[i]; v2[i] = (int*)&mesh->vertex(tri.v[2])-(int*)v0[i]; } for (size_t i=items; i<4; i++) { geomID[i] = -1; primID[i] = -1; v0[i] = v0[0]; v1[i] = 0; v2[i] = 0; } new (accel) Triangle4i(v0,v1,v2,geomID,primID); }
void BVH4BuilderFast::createLeaf(BuildRecord& current, Allocator& nodeAlloc, Allocator& leafAlloc, size_t threadIndex, size_t threadCount) { #if defined(DEBUG) if (current.depth > BVH4::maxBuildDepthLeaf) throw std::runtime_error("ERROR: depth limit reached"); #endif /* create leaf for few primitives */ if (current.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) { createSmallLeaf(this,current,leafAlloc,threadIndex); return; } /* first split level */ BuildRecord record0, record1; split_fallback(prims,current,record0,record1); /* second split level */ BuildRecord children[4]; split_fallback(prims,record0,children[0],children[1]); split_fallback(prims,record1,children[2],children[3]); /* allocate node */ Node* node = (Node*) nodeAlloc.malloc(sizeof(Node)); node->clear(); *(NodeRef*)current.parentNode = bvh->encodeNode(node); /* recurse into each child */ for (size_t i=0; i<4; i++) { node->set(i,children[i].bounds.geometry); children[i].parentNode = (size_t)&node->child(i); children[i].depth = current.depth+1; createLeaf(children[i],nodeAlloc,leafAlloc,threadIndex,threadCount); } BVH4::compact(node); // move empty nodes to the end }
void BVH4BuilderTopLevel::build_toplevel(size_t threadIndex, size_t threadCount) { /* calculate scene bounds */ Centroid_Scene_AABB bounds; bounds.reset(); for (size_t i=0; i<threadCount; i++) bounds.extend(g_state->thread_bounds[i]); /* ignore empty scenes */ //bvh->clear(); bvh->bounds = bounds.geometry; refs.resize(nextRef); if (refs.size() == 0) return; double t0 = 0.0; if (g_verbose >= 2) { std::cout << "building BVH4<" << bvh->primTy.name << "> with toplevel SAH builder ... " << std::flush; t0 = getSeconds(); } /* open all large nodes */ #if 0 open_sequential(); refs1.resize(refs.size()); #else global_dest = refs.size(); size_t M = max(size_t(2*global_dest),size_t(MIN_OPEN_SIZE)); refs .resize(M); refs1.resize(M); barrier.init(threadCount); TaskScheduler::executeTask(threadIndex,threadCount,_task_open_parallel,this,threadCount,"toplevel_open_parallel"); refs.resize(global_dest); #endif bvh->init(refs.size()); /* start toplevel build */ BuildRecord task; task.init(bounds,0,refs.size()); task.parentNode = (size_t)&bvh->root; task.depth = 1; /* initialize thread-local work stacks */ for (size_t i=0; i<threadCount; i++) g_state->thread_workStack[i].reset(); /* push initial build record to global work stack */ g_state->global_workStack.reset(); g_state->global_workStack.push_nolock(task); /* work in multithreaded toplevel mode until sufficient subtasks got generated */ while (g_state->global_workStack.size() < 4*threadCount && g_state->global_workStack.size()+BVH4::N <= SIZE_WORK_STACK) { BuildRecord br; if (!g_state->global_workStack.pop_nolock_largest(br)) break; recurseSAH(0,br,BUILD_TOP_LEVEL,threadIndex,threadCount); } /* now process all created subtasks on multiple threads */ TaskScheduler::executeTask(threadIndex,threadCount,_task_build_subtrees,this,threadCount,"toplevel_build_subtrees"); if (g_verbose >= 2) { double t1 = getSeconds(); std::cout << "[DONE]" << std::endl; std::cout << " dt = " << 1000.0f*(t1-t0) << "ms" << std::endl; std::cout << BVH4Statistics(bvh).str(); } }
void BVH4BuilderFast::build_parallel(size_t threadIndex, size_t threadCount, size_t taskIndex, size_t taskCount, TaskScheduler::Event* event) { /* wait for all threads to enter */ g_state->barrier.wait(threadIndex,threadCount); /* start measurement */ double t0 = 0.0f; if (g_verbose >= 2) t0 = getSeconds(); /* all worker threads enter tasking system */ if (threadIndex != 0) { g_state->scheduler.dispatchTaskMainLoop(threadIndex,threadCount); return; } /* calculate list of primrefs */ global_bounds.reset(); g_state->scheduler.dispatchTask( task_computePrimRefs, this, threadIndex, threadCount ); bvh->bounds = global_bounds.geometry; /* initialize node and leaf allocator */ nodeAllocator.reset(); primAllocator.reset(); __aligned(64) Allocator nodeAlloc(nodeAllocator); __aligned(64) Allocator leafAlloc(primAllocator); /* create initial build record */ BuildRecord br; br.init(global_bounds,0,numPrimitives); br.depth = 1; br.parentNode = (size_t)&bvh->root; /* initialize thread-local work stacks */ for (size_t i=0; i<threadCount; i++) g_state->threadStack[i].reset(); /* push initial build record to global work stack */ g_state->workStack.reset(); g_state->workStack.push_nolock(br); /* work in multithreaded toplevel mode until sufficient subtasks got generated */ while (g_state->workStack.size() < 4*threadCount && g_state->workStack.size()+BVH4::N <= SIZE_WORK_STACK) { BuildRecord br; /* pop largest item for better load balancing */ if (!g_state->workStack.pop_nolock_largest(br)) break; /* guarantees to create no leaves in this stage */ if (br.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) break; recurseSAH(br,nodeAlloc,leafAlloc,BUILD_TOP_LEVEL,threadIndex,threadCount); } /* now process all created subtasks on multiple threads */ g_state->scheduler.dispatchTask(task_buildSubTrees, this, threadIndex, threadCount ); /* release all threads again */ g_state->scheduler.releaseThreads(threadCount); /* stop measurement */ if (g_verbose >= 2) dt = getSeconds()-t0; }
void BVH4BuilderFast::recurseSAH(BuildRecord& current, Allocator& nodeAlloc, Allocator& leafAlloc, const size_t mode, const size_t threadID, const size_t numThreads) { __aligned(64) BuildRecord children[BVH4::N]; /* create leaf node */ if (current.depth >= BVH4::maxBuildDepth || current.isLeaf()) { assert(mode != BUILD_TOP_LEVEL); createLeaf(current,nodeAlloc,leafAlloc,threadID,numThreads); return; } /* fill all 4 children by always splitting the one with the largest surface area */ unsigned int numChildren = 1; children[0] = current; do { /* find best child with largest bounding box area */ int bestChild = -1; float bestArea = neg_inf; for (unsigned int i=0; i<numChildren; i++) { /* ignore leaves as they cannot get split */ if (children[i].isLeaf()) continue; /* remember child with largest area */ if (children[i].sceneArea() > bestArea) { bestArea = children[i].sceneArea(); bestChild = i; } } if (bestChild == -1) break; /*! split best child into left and right child */ __aligned(64) BuildRecord left, right; if (!split(children[bestChild],left,right,mode,threadID,numThreads)) continue; /* add new children left and right */ left.depth = right.depth = current.depth+1; children[bestChild] = children[numChildren-1]; children[numChildren-1] = left; children[numChildren+0] = right; numChildren++; } while (numChildren < BVH4::N); /* create leaf node if no split is possible */ if (numChildren == 1) { assert(mode != BUILD_TOP_LEVEL); createLeaf(current,nodeAlloc,leafAlloc,threadID,numThreads); return; } /* allocate node */ Node* node = (Node*) nodeAlloc.malloc(sizeof(Node)); node->clear(); *(NodeRef*)current.parentNode = bvh->encodeNode(node); /* recurse into each child */ for (unsigned int i=0; i<numChildren; i++) { node->set(i,children[i].bounds.geometry); children[i].parentNode = (size_t)&node->child(i); children[i].depth = current.depth+1; recurse(children[i],nodeAlloc,leafAlloc,mode,threadID,numThreads); } }