void BVH4BuilderTopLevel::recurseSAH(size_t depth, BuildRecord& task, const size_t mode, const size_t threadID, const size_t numThreads) { /* return leaf node */ assert(task.end-task.begin > 0); if (unlikely(task.end-task.begin == 1)) { *(NodeRef*)task.parentNode = refs[task.begin].node; return; } /* create leaf node */ if (unlikely(task.depth >= BVH4::maxBuildDepth)) { createLeaf(task,threadID,numThreads); return; } /*! initialize task list */ BuildRecord childTasks[4]; childTasks[0] = task; size_t numChildren = 1; /*! split until node is full */ do { /*! find best child to split */ float bestArea = inf; ssize_t bestChild = -1; for (size_t i=0; i<numChildren; i++) { float A = childTasks[i].sceneArea(); size_t items = childTasks[i].items(); if (items > 1 && A <= bestArea) { bestChild = i; bestArea = A; } } if (bestChild == -1) break; /*! split best child into left and right child */ __align(64) BuildRecord left, right; split(childTasks[bestChild],left,right,mode,threadID,numThreads); /* add new children left and right */ left.depth = right.depth = task.depth+1; childTasks[bestChild] = childTasks[numChildren-1]; childTasks[numChildren-1] = left; childTasks[numChildren+0] = right; numChildren++; } while (numChildren < 4); /* recurse */ BVH4::Node* node = bvh->allocNode(threadID); for (ssize_t i=numChildren-1; i>=0; i--) { childTasks[i].parentNode = (size_t)&node->child(i); recurse(depth+1,childTasks[i],mode,threadID,numThreads); node->set(i,childTasks[i].bounds.geometry); } *(NodeRef*)task.parentNode = bvh->encodeNode(node); }
/* prints the bvh4.triangle4v data structure */ void print_bvh4_triangle4v(BVH4::NodeRef node, size_t depth) { if (node.isNode()) { BVH4::Node* n = node.node(); std::cout << "Node {" << std::endl; for (size_t i=0; i<BVH4::N; i++) { for (size_t k=0; k<depth; k++) std::cout << " "; std::cout << " bounds" << i << " = " << n->bounds(i) << std::endl; } for (size_t i=0; i<BVH4::N; i++) { if (n->child(i) == BVH4::emptyNode) continue; for (size_t k=0; k<depth; k++) std::cout << " "; std::cout << " child" << i << " = "; print_bvh4_triangle4v(n->child(i),depth+1); } for (size_t k=0; k<depth; k++) std::cout << " "; std::cout << "}" << std::endl; } else { size_t num; const Triangle4v* tri = (const Triangle4v*) node.leaf(num); std::cout << "Leaf {" << std::endl; for (size_t i=0; i<num; i++) { for (size_t j=0; j<tri[i].size(); j++) { for (size_t k=0; k<depth; k++) std::cout << " "; std::cout << " Triangle { v0 = (" << tri[i].v0.x[j] << ", " << tri[i].v0.y[j] << ", " << tri[i].v0.z[j] << "), " "v1 = (" << tri[i].v1.x[j] << ", " << tri[i].v1.y[j] << ", " << tri[i].v1.z[j] << "), " "v2 = (" << tri[i].v2.x[j] << ", " << tri[i].v2.y[j] << ", " << tri[i].v2.z[j] << "), " "geomID = " << tri[i].geomID(j) << ", primID = " << tri[i].primID(j) << " }" << std::endl; } } for (size_t k=0; k<depth; k++) std::cout << " "; std::cout << "}" << std::endl; } }
void BVH4BuilderTopLevel::open_sequential() { size_t N = max(2*refs.size(),size_t(MIN_OPEN_SIZE)); refs.reserve(N); std::make_heap(refs.begin(),refs.end()); while (refs.size()+3 <= N) { std::pop_heap (refs.begin(),refs.end()); BVH4::NodeRef ref = refs.back().node; if (ref.isLeaf()) break; refs.pop_back(); BVH4::Node* node = ref.node(); for (size_t i=0; i<4; i++) { if (node->child(i) == BVH4::emptyNode) continue; refs.push_back(BuildRef(node->bounds(i),node->child(i))); std::push_heap (refs.begin(),refs.end()); } } }
void BVH4BuilderTopLevel::createLeaf(BuildRecord& current, size_t threadIndex, size_t threadCount) { #if defined(DEBUG) if (current.depth > BVH4::maxBuildDepthLeaf) throw std::runtime_error("ERROR: depth limit reached"); #endif /* return empty node */ if (current.end-current.begin == 0) { *(NodeRef*)current.parentNode = BVH4::emptyNode; return; } /* return leaf node */ if (current.end-current.begin == 1) { *(NodeRef*)current.parentNode = refs[current.begin].node; return; } /* first split level */ BuildRecord record0, record1; split_fallback2(&refs[0],current,record0,record1); /* second split level */ BuildRecord children[4]; split_fallback2(&refs[0],record0,children[0],children[1]); split_fallback2(&refs[0],record1,children[2],children[3]); /* allocate next four nodes */ BVH4::Node* node = bvh->allocNode(threadIndex); *(NodeRef*)current.parentNode = bvh->encodeNode(node); /* recurse into each child */ for (size_t i=0; i<4; i++) { children[i].parentNode = (size_t)&node->child(i); children[i].depth = current.depth+1; createLeaf(children[i],threadIndex,threadCount); node->set(i,children[i].bounds.geometry); } BVH4::compact(node); // move empty nodes to the end }
void BVH4BuilderTwoLevel::build(size_t threadIndex, size_t threadCount) { /* delete some objects */ size_t N = scene->size(); if (N < objects.size()) { parallel_for(N, objects.size(), [&] (const range<size_t>& r) { for (size_t i=r.begin(); i<r.end(); i++) { delete builders[i]; builders[i] = nullptr; delete objects[i]; objects[i] = nullptr; } }); } /* reset memory allocator */ bvh->alloc.reset(); /* skip build for empty scene */ const size_t numPrimitives = scene->getNumPrimitives<TriangleMesh,1>(); if (numPrimitives == 0) { prims.resize(0); bvh->set(BVH4::emptyNode,empty,0); return; } double t0 = bvh->preBuild(TOSTRING(isa) "::BVH4BuilderTwoLevel"); #if PROFILE profile(2,20,numPrimitives,[&] (ProfileTimer& timer) { #endif /* resize object array if scene got larger */ if (objects.size() < N) objects.resize(N); if (builders.size() < N) builders.resize(N); if (refs.size() < N) refs.resize(N); nextRef = 0; /* create of acceleration structures */ parallel_for(size_t(0), N, [&] (const range<size_t>& r) { for (size_t objectID=r.begin(); objectID<r.end(); objectID++) { TriangleMesh* mesh = scene->getTriangleMeshSafe(objectID); /* verify meshes got deleted properly */ if (mesh == nullptr || mesh->numTimeSteps != 1) { assert(objectID < objects.size () && objects[objectID] == nullptr); assert(objectID < builders.size() && builders[objectID] == nullptr); continue; } /* create BVH and builder for new meshes */ if (objects[objectID] == nullptr) createTriangleMeshAccel(mesh,(AccelData*&)objects[objectID],builders[objectID]); } }); /* parallel build of acceleration structures */ parallel_for(size_t(0), N, [&] (const range<size_t>& r) { for (size_t objectID=r.begin(); objectID<r.end(); objectID++) { /* ignore if no triangle mesh or not enabled */ TriangleMesh* mesh = scene->getTriangleMeshSafe(objectID); if (mesh == nullptr || !mesh->isEnabled() || mesh->numTimeSteps != 1) continue; BVH4* object = objects [objectID]; assert(object); Builder* builder = builders[objectID]; assert(builder); /* build object if it got modified */ #if !PROFILE if (mesh->isModified()) #endif builder->build(0,0); /* create build primitive */ if (!object->bounds.empty()) refs[nextRef++] = BVH4BuilderTwoLevel::BuildRef(object->bounds,object->root); } }); /* fast path for single geometry scenes */ if (nextRef == 1) { bvh->set(refs[0].node,refs[0].bounds(),numPrimitives); return; } /* open all large nodes */ refs.resize(nextRef); open_sequential(numPrimitives); /* fast path for small geometries */ if (refs.size() == 1) { bvh->set(refs[0].node,refs[0].bounds(),numPrimitives); return; } /* compute PrimRefs */ prims.resize(refs.size()); const PrimInfo pinfo = parallel_reduce(size_t(0), refs.size(), size_t(1024), PrimInfo(empty), [&] (const range<size_t>& r) -> PrimInfo { PrimInfo pinfo(empty); for (size_t i=r.begin(); i<r.end(); i++) { pinfo.add(refs[i].bounds()); prims[i] = PrimRef(refs[i].bounds(),(size_t)refs[i].node); } return pinfo; }, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); }); /* skip if all objects where empty */ if (pinfo.size() == 0) bvh->set(BVH4::emptyNode,empty,0); /* otherwise build toplevel hierarchy */ else { BVH4::NodeRef root; BVHBuilderBinnedSAH::build<BVH4::NodeRef> (root, [&] { return bvh->alloc.threadLocal2(); }, [&] (const isa::BVHBuilderBinnedSAH::BuildRecord& current, BVHBuilderBinnedSAH::BuildRecord* children, const size_t N, FastAllocator::ThreadLocal2* alloc) -> int { BVH4::Node* node = (BVH4::Node*) alloc->alloc0.malloc(sizeof(BVH4::Node)); node->clear(); for (size_t i=0; i<N; i++) { node->set(i,children[i].pinfo.geomBounds); children[i].parent = (size_t*)&node->child(i); } *current.parent = bvh->encodeNode(node); return 0; }, [&] (const BVHBuilderBinnedSAH::BuildRecord& current, FastAllocator::ThreadLocal2* alloc) -> int { assert(current.prims.size() == 1); *current.parent = (BVH4::NodeRef) prims[current.prims.begin()].ID(); return 1; }, [&] (size_t dn) { bvh->scene->progressMonitor(0); }, prims.data(),pinfo,BVH4::N,BVH4::maxBuildDepthLeaf,4,1,1,1.0f,1.0f); bvh->set(root,pinfo.geomBounds,numPrimitives); } #if PROFILE }); #endif bvh->alloc.cleanup(); bvh->postBuild(t0); }
void BVH4BuilderTopLevel::task_open_parallel(size_t threadIndex, size_t threadCount, size_t taskIndex, size_t taskCount, TaskScheduler::Event* event) { size_t N = global_dest; size_t M = refs1.size(); const size_t start0 = (threadIndex+0)*N/threadCount; const size_t end0 = (threadIndex+1)*N/threadCount; const size_t start1 = (threadIndex+0)*M/threadCount; const size_t end1 = (threadIndex+1)*M/threadCount; assert(end1-start1 >= end0-start0); BuildRef* prefs1 = &refs1[0]; /* copy from refs buffer to refs1 buffer */ for (size_t i=start0, j=start1; i<end0; i++, j++) refs1[j] = refs[i]; /* create max heap in our set of items */ size_t start = start1; size_t end = start1+end0-start0; std::make_heap(&prefs1[start],&prefs1[end]); float max_volume = 0.0f; while (true) { barrier.wait(threadIndex,threadCount); if (threadIndex == 0) global_max_volume = 0.0f; barrier.wait(threadIndex,threadCount); /* parallel calculation of maximal volume */ max_volume = 0.0f; if (end+3 <= end1) for (size_t i=start; i<end; i++) max_volume = max(max_volume,prefs1[i].lower.w); atomic_max_f32(&global_max_volume,max_volume); barrier.wait(threadIndex,threadCount); max_volume = global_max_volume; barrier.wait(threadIndex,threadCount); /* if maximal volume is 0, all threads are finished */ if (max_volume == 0.0f) break; /* open all nodes that are considered large in this iteration */ while (end+3 <= end1) { if (end-start == 0) break; std::pop_heap(&prefs1[start],&prefs1[end]); BVH4::NodeRef ref = prefs1[end-1].node; float vol = prefs1[end-1].lower.w; if (ref.isLeaf() || vol < 0.5f*max_volume) { std::push_heap(&prefs1[start],&prefs1[end]); break; } end--; BVH4::Node* node = ref.node(); for (size_t i=0; i<4; i++) { if (node->child(i) == BVH4::emptyNode) continue; prefs1[end++] = BuildRef(node->bounds(i),node->child(i)); std::push_heap(&prefs1[start],&prefs1[end]); } } } if (threadIndex == 0) global_dest = 0; barrier.wait(threadIndex,threadCount); /* copy again back to refs array */ size_t dest = atomic_add(&global_dest,end-start); for (size_t i=start, j=dest; i<end; i++, j++) refs[j] = refs1[i]; }