bool BVH4BuilderFast::splitParallel(BuildRecord ¤t, BuildRecord &leftChild, BuildRecord &rightChild, const size_t threadID, const size_t numThreads) { const unsigned int items = current.end - current.begin; assert(items >= BUILD_RECORD_SPLIT_THRESHOLD); /* mark as leaf if leaf threshold reached */ if (items <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) { current.createLeaf(); return false; } /* use primitive array temporarily for parallel splits */ PrimRef* tmp = (PrimRef*) primAllocator.base(); /* parallel binning of centroids */ g_state->parallelBinner.bin(current,prims,tmp,threadID,numThreads); /* find best split */ Split split; g_state->parallelBinner.best(split); /* if we cannot find a valid split, enforce an arbitrary split */ if (unlikely(split.pos == -1)) split_fallback(prims,current,leftChild,rightChild); /* parallel partitioning of items */ else g_state->parallelBinner.partition(tmp,prims,split,leftChild,rightChild,threadID,numThreads); if (leftChild.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) leftChild.createLeaf(); if (rightChild.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) rightChild.createLeaf(); return true; }
bool BVH4BuilderFast::splitSequential(BuildRecord& current, BuildRecord& leftChild, BuildRecord& rightChild) { /* mark as leaf if leaf threshold reached */ if (current.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) { current.createLeaf(); return false; } /* calculate binning function */ Mapping<16> mapping(current.bounds); /* binning of centroids */ Binner<16> binner; binner.bin(prims,current.begin,current.end,mapping); /* find best split */ Split split; binner.best(split,mapping); /* if we cannot find a valid split, enforce an arbitrary split */ if (unlikely(split.pos == -1)) split_fallback(prims,current,leftChild,rightChild); /* partitioning of items */ else binner.partition(prims, current.begin, current.end, split, mapping, leftChild, rightChild); if (leftChild.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) leftChild.createLeaf(); if (rightChild.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) rightChild.createLeaf(); return true; }
__forceinline void BVH4BuilderTopLevel::split(BuildRecord& current, BuildRecord& left, BuildRecord& right, const size_t mode, const size_t threadID, const size_t numThreads) { if (mode == BUILD_TOP_LEVEL && current.items() >= BUILD_RECORD_SPLIT_THRESHOLD) return split_parallel(current,left,right,threadID,numThreads); else return split_sequential(current,left,right); }
void BVH4BuilderFast::createTriangle4vLeaf(const BVH4BuilderFast* This, BuildRecord& current, Allocator& leafAlloc, size_t threadID) { size_t items = current.items(); size_t start = current.begin; assert(items<=4); /* allocate leaf node */ Triangle4v* accel = (Triangle4v*) leafAlloc.malloc(sizeof(Triangle4v)); *(NodeRef*)current.parentNode = This->bvh->encodeLeaf((char*)accel,1); ssei vgeomID = -1, vprimID = -1, vmask = -1; sse3f v0 = zero, v1 = zero, v2 = zero; for (size_t i=0; i<items; i++) { const size_t geomID = This->prims[start+i].geomID(); const size_t primID = This->prims[start+i].primID(); const TriangleMesh* __restrict__ const mesh = This->scene->getTriangleMesh(geomID); const TriangleMesh::Triangle& tri = mesh->triangle(primID); const Vec3fa& p0 = mesh->vertex(tri.v[0]); const Vec3fa& p1 = mesh->vertex(tri.v[1]); const Vec3fa& p2 = mesh->vertex(tri.v[2]); vgeomID [i] = geomID; vprimID [i] = primID; vmask [i] = mesh->mask; v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z; v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z; v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z; } Triangle4v::store_nt(accel,Triangle4v(v0,v1,v2,vgeomID,vprimID,vmask)); }
void BVH4BuilderFast::createTriangle1vLeaf(const BVH4BuilderFast* This, BuildRecord& current, Allocator& leafAlloc, size_t threadID) { size_t items = current.items(); size_t start = current.begin; assert(items<=4); /* allocate leaf node */ Triangle1v* accel = (Triangle1v*) leafAlloc.malloc(items*sizeof(Triangle1v)); *(NodeRef*)current.parentNode = This->bvh->encodeLeaf((char*)accel,items); for (size_t i=0; i<items; i++) { const size_t geomID = This->prims[start+i].geomID(); const size_t primID = This->prims[start+i].primID(); const TriangleMesh* __restrict__ const mesh = This->scene->getTriangleMesh(geomID); const TriangleMesh::Triangle& tri = mesh->triangle(primID); const ssef v0 = select(0x7,(ssef)mesh->vertex(tri.v[0]),zero); const ssef v1 = select(0x7,(ssef)mesh->vertex(tri.v[1]),zero); const ssef v2 = select(0x7,(ssef)mesh->vertex(tri.v[2]),zero); const ssef e1 = v0 - v1; const ssef e2 = v2 - v0; const ssef normal = cross(e1,e2); store4f_nt(&accel[i].v0,cast(insert<3>(cast(v0),primID))); store4f_nt(&accel[i].v1,cast(insert<3>(cast(v1),geomID))); store4f_nt(&accel[i].v2,cast(insert<3>(cast(v2),mesh->mask))); } }
__forceinline void BVH4BuilderTopLevel::recurse(size_t depth, BuildRecord& current, const size_t mode, const size_t threadID, const size_t numThreads) { if (mode == BUILD_TOP_LEVEL) { g_state->global_workStack.push_nolock(current); } else if (current.items() > THRESHOLD_FOR_SUBTREE_RECURSION) { if (!g_state->thread_workStack[threadID].push(current)) recurseSAH(depth,current,RECURSE,threadID,numThreads); } else recurseSAH(depth,current,RECURSE,threadID,numThreads); }
__forceinline void BVH4BuilderFast::recurse(BuildRecord& current, Allocator& nodeAlloc, Allocator& leafAlloc, const size_t mode, const size_t threadID, const size_t numThreads) { if (mode == BUILD_TOP_LEVEL) { g_state->workStack.push_nolock(current); } else if (mode == RECURSE_PARALLEL && current.items() > THRESHOLD_FOR_SUBTREE_RECURSION) { if (!g_state->threadStack[threadID].push(current)) recurseSAH(current,nodeAlloc,leafAlloc,RECURSE_SEQUENTIAL,threadID,numThreads); } else recurseSAH(current,nodeAlloc,leafAlloc,mode,threadID,numThreads); }
void BVH4BuilderFast::createLeaf(BuildRecord& current, Allocator& nodeAlloc, Allocator& leafAlloc, size_t threadIndex, size_t threadCount) { #if defined(DEBUG) if (current.depth > BVH4::maxBuildDepthLeaf) throw std::runtime_error("ERROR: depth limit reached"); #endif /* create leaf for few primitives */ if (current.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) { createSmallLeaf(this,current,leafAlloc,threadIndex); return; } /* first split level */ BuildRecord record0, record1; split_fallback(prims,current,record0,record1); /* second split level */ BuildRecord children[4]; split_fallback(prims,record0,children[0],children[1]); split_fallback(prims,record1,children[2],children[3]); /* allocate node */ Node* node = (Node*) nodeAlloc.malloc(sizeof(Node)); node->clear(); *(NodeRef*)current.parentNode = bvh->encodeNode(node); /* recurse into each child */ for (size_t i=0; i<4; i++) { node->set(i,children[i].bounds.geometry); children[i].parentNode = (size_t)&node->child(i); children[i].depth = current.depth+1; createLeaf(children[i],nodeAlloc,leafAlloc,threadIndex,threadCount); } BVH4::compact(node); // move empty nodes to the end }
void BVH4BuilderFast::build_parallel(size_t threadIndex, size_t threadCount, size_t taskIndex, size_t taskCount, TaskScheduler::Event* event) { /* wait for all threads to enter */ g_state->barrier.wait(threadIndex,threadCount); /* start measurement */ double t0 = 0.0f; if (g_verbose >= 2) t0 = getSeconds(); /* all worker threads enter tasking system */ if (threadIndex != 0) { g_state->scheduler.dispatchTaskMainLoop(threadIndex,threadCount); return; } /* calculate list of primrefs */ global_bounds.reset(); g_state->scheduler.dispatchTask( task_computePrimRefs, this, threadIndex, threadCount ); bvh->bounds = global_bounds.geometry; /* initialize node and leaf allocator */ nodeAllocator.reset(); primAllocator.reset(); __aligned(64) Allocator nodeAlloc(nodeAllocator); __aligned(64) Allocator leafAlloc(primAllocator); /* create initial build record */ BuildRecord br; br.init(global_bounds,0,numPrimitives); br.depth = 1; br.parentNode = (size_t)&bvh->root; /* initialize thread-local work stacks */ for (size_t i=0; i<threadCount; i++) g_state->threadStack[i].reset(); /* push initial build record to global work stack */ g_state->workStack.reset(); g_state->workStack.push_nolock(br); /* work in multithreaded toplevel mode until sufficient subtasks got generated */ while (g_state->workStack.size() < 4*threadCount && g_state->workStack.size()+BVH4::N <= SIZE_WORK_STACK) { BuildRecord br; /* pop largest item for better load balancing */ if (!g_state->workStack.pop_nolock_largest(br)) break; /* guarantees to create no leaves in this stage */ if (br.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) break; recurseSAH(br,nodeAlloc,leafAlloc,BUILD_TOP_LEVEL,threadIndex,threadCount); } /* now process all created subtasks on multiple threads */ g_state->scheduler.dispatchTask(task_buildSubTrees, this, threadIndex, threadCount ); /* release all threads again */ g_state->scheduler.releaseThreads(threadCount); /* stop measurement */ if (g_verbose >= 2) dt = getSeconds()-t0; }