void BVH4BuilderFast::buildSubTrees(const size_t threadID, const size_t numThreads) { __aligned(64) Allocator nodeAlloc(nodeAllocator); __aligned(64) Allocator leafAlloc(primAllocator); while (true) { BuildRecord br; if (!g_state->workStack.pop_largest(br)) // FIXME: might loose threads during build { /* global work queue empty => try to steal from neighboring queues */ bool success = false; for (size_t i=0; i<numThreads; i++) { if (g_state->threadStack[(threadID+i)%numThreads].pop_smallest(br)) { success = true; break; } } /* found nothing to steal ? */ if (!success) break; } /* process local work queue */ recurseSAH(br,nodeAlloc,leafAlloc,RECURSE_PARALLEL,threadID,numThreads); while (g_state->threadStack[threadID].pop_largest(br)) recurseSAH(br,nodeAlloc,leafAlloc,RECURSE_PARALLEL,threadID,numThreads); } }
__forceinline void BVH4BuilderTopLevel::recurse(size_t depth, BuildRecord& current, const size_t mode, const size_t threadID, const size_t numThreads) { if (mode == BUILD_TOP_LEVEL) { g_state->global_workStack.push_nolock(current); } else if (current.items() > THRESHOLD_FOR_SUBTREE_RECURSION) { if (!g_state->thread_workStack[threadID].push(current)) recurseSAH(depth,current,RECURSE,threadID,numThreads); } else recurseSAH(depth,current,RECURSE,threadID,numThreads); }
__forceinline void BVH4BuilderFast::recurse(BuildRecord& current, Allocator& nodeAlloc, Allocator& leafAlloc, const size_t mode, const size_t threadID, const size_t numThreads) { if (mode == BUILD_TOP_LEVEL) { g_state->workStack.push_nolock(current); } else if (mode == RECURSE_PARALLEL && current.items() > THRESHOLD_FOR_SUBTREE_RECURSION) { if (!g_state->threadStack[threadID].push(current)) recurseSAH(current,nodeAlloc,leafAlloc,RECURSE_SEQUENTIAL,threadID,numThreads); } else recurseSAH(current,nodeAlloc,leafAlloc,mode,threadID,numThreads); }
void BVH4BuilderTopLevel::task_build_subtrees(size_t threadIndex, size_t threadCount, size_t taskIndex, size_t taskCount, TaskScheduler::Event* event) { while (true) { BuildRecord br; if (!g_state->global_workStack.pop_largest(br)) // FIXME: might loose threads during build { /* global work queue empty => try to steal from neighboring queues */ bool success = false; for (size_t i=0; i<threadCount; i++) { if (g_state->thread_workStack[(threadIndex+i)%threadCount].pop_smallest(br)) { success = true; break; } } /* found nothing to steal ? */ if (!success) break; } /* process local work queue */ g_state->thread_workStack[threadIndex].push(br); while (g_state->thread_workStack[threadIndex].pop_largest(br)) recurseSAH(0,br,RECURSE,threadIndex,threadCount); } }
void BVH4BuilderFast::build_sequential(size_t threadIndex, size_t threadCount) { /* start measurement */ double t0 = 0.0f; if (g_verbose >= 2) t0 = getSeconds(); /* initialize node and leaf allocator */ nodeAllocator.reset(); primAllocator.reset(); __aligned(64) Allocator nodeAlloc(nodeAllocator); __aligned(64) Allocator leafAlloc(primAllocator); /* create prim refs */ global_bounds.reset(); computePrimRefs(0,1); bvh->bounds = global_bounds.geometry; /* create initial build record */ BuildRecord br; br.init(global_bounds,0,numPrimitives); br.depth = 1; br.parentNode = (size_t)&bvh->root; /* build BVH in single thread */ recurseSAH(br,nodeAlloc,leafAlloc,RECURSE_SEQUENTIAL,threadIndex,threadCount); /* stop measurement */ if (g_verbose >= 2) dt = getSeconds()-t0; }
void BVH4BuilderTopLevel::build_toplevel(size_t threadIndex, size_t threadCount) { /* calculate scene bounds */ Centroid_Scene_AABB bounds; bounds.reset(); for (size_t i=0; i<threadCount; i++) bounds.extend(g_state->thread_bounds[i]); /* ignore empty scenes */ //bvh->clear(); bvh->bounds = bounds.geometry; refs.resize(nextRef); if (refs.size() == 0) return; double t0 = 0.0; if (g_verbose >= 2) { std::cout << "building BVH4<" << bvh->primTy.name << "> with toplevel SAH builder ... " << std::flush; t0 = getSeconds(); } /* open all large nodes */ #if 0 open_sequential(); refs1.resize(refs.size()); #else global_dest = refs.size(); size_t M = max(size_t(2*global_dest),size_t(MIN_OPEN_SIZE)); refs .resize(M); refs1.resize(M); barrier.init(threadCount); TaskScheduler::executeTask(threadIndex,threadCount,_task_open_parallel,this,threadCount,"toplevel_open_parallel"); refs.resize(global_dest); #endif bvh->init(refs.size()); /* start toplevel build */ BuildRecord task; task.init(bounds,0,refs.size()); task.parentNode = (size_t)&bvh->root; task.depth = 1; /* initialize thread-local work stacks */ for (size_t i=0; i<threadCount; i++) g_state->thread_workStack[i].reset(); /* push initial build record to global work stack */ g_state->global_workStack.reset(); g_state->global_workStack.push_nolock(task); /* work in multithreaded toplevel mode until sufficient subtasks got generated */ while (g_state->global_workStack.size() < 4*threadCount && g_state->global_workStack.size()+BVH4::N <= SIZE_WORK_STACK) { BuildRecord br; if (!g_state->global_workStack.pop_nolock_largest(br)) break; recurseSAH(0,br,BUILD_TOP_LEVEL,threadIndex,threadCount); } /* now process all created subtasks on multiple threads */ TaskScheduler::executeTask(threadIndex,threadCount,_task_build_subtrees,this,threadCount,"toplevel_build_subtrees"); if (g_verbose >= 2) { double t1 = getSeconds(); std::cout << "[DONE]" << std::endl; std::cout << " dt = " << 1000.0f*(t1-t0) << "ms" << std::endl; std::cout << BVH4Statistics(bvh).str(); } }
void BVH4BuilderFast::build_parallel(size_t threadIndex, size_t threadCount, size_t taskIndex, size_t taskCount, TaskScheduler::Event* event) { /* wait for all threads to enter */ g_state->barrier.wait(threadIndex,threadCount); /* start measurement */ double t0 = 0.0f; if (g_verbose >= 2) t0 = getSeconds(); /* all worker threads enter tasking system */ if (threadIndex != 0) { g_state->scheduler.dispatchTaskMainLoop(threadIndex,threadCount); return; } /* calculate list of primrefs */ global_bounds.reset(); g_state->scheduler.dispatchTask( task_computePrimRefs, this, threadIndex, threadCount ); bvh->bounds = global_bounds.geometry; /* initialize node and leaf allocator */ nodeAllocator.reset(); primAllocator.reset(); __aligned(64) Allocator nodeAlloc(nodeAllocator); __aligned(64) Allocator leafAlloc(primAllocator); /* create initial build record */ BuildRecord br; br.init(global_bounds,0,numPrimitives); br.depth = 1; br.parentNode = (size_t)&bvh->root; /* initialize thread-local work stacks */ for (size_t i=0; i<threadCount; i++) g_state->threadStack[i].reset(); /* push initial build record to global work stack */ g_state->workStack.reset(); g_state->workStack.push_nolock(br); /* work in multithreaded toplevel mode until sufficient subtasks got generated */ while (g_state->workStack.size() < 4*threadCount && g_state->workStack.size()+BVH4::N <= SIZE_WORK_STACK) { BuildRecord br; /* pop largest item for better load balancing */ if (!g_state->workStack.pop_nolock_largest(br)) break; /* guarantees to create no leaves in this stage */ if (br.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) break; recurseSAH(br,nodeAlloc,leafAlloc,BUILD_TOP_LEVEL,threadIndex,threadCount); } /* now process all created subtasks on multiple threads */ g_state->scheduler.dispatchTask(task_buildSubTrees, this, threadIndex, threadCount ); /* release all threads again */ g_state->scheduler.releaseThreads(threadCount); /* stop measurement */ if (g_verbose >= 2) dt = getSeconds()-t0; }