BBox3fa SceneTriangle8::update(char* prim, size_t num, void* geom) const { BBox3fa bounds = empty; Scene* scene = (Scene*) geom; for (size_t j=0; j<num; j++) { Triangle8& dst = ((Triangle8*) prim)[j]; avxi vgeomID = -1, vprimID = -1, vmask = -1; avx3f v0 = zero, v1 = zero, v2 = zero; for (size_t i=0; i<8; i++) { if (dst.primID[i] == -1) break; const unsigned geomID = dst.geomID[i]; const unsigned primID = dst.primID[i]; const TriangleMesh* mesh = scene->getTriangleMesh(geomID); const TriangleMesh::Triangle& tri = mesh->triangle(primID); const Vec3fa p0 = mesh->vertex(tri.v[0]); const Vec3fa p1 = mesh->vertex(tri.v[1]); const Vec3fa p2 = mesh->vertex(tri.v[2]); bounds.extend(merge(BBox3fa(p0),BBox3fa(p1),BBox3fa(p2))); vgeomID [i] = geomID; vprimID [i] = primID; vmask [i] = mesh->mask; v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z; v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z; v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z; } new (&dst) Triangle8(v0,v1,v2,vgeomID,vprimID,vmask); } return bounds; }
PrimRefGen<Heuristic>::PrimRefGen(size_t threadIndex, size_t threadCount, const BuildSource* geom, PrimRefAlloc* alloc) : geom(geom), numPrimitives(0), numVertices(0), alloc(alloc) { /* compute number of primitives */ size_t numGroups = geom->groups(); for (size_t g=0; g<numGroups; g++) { size_t vertices = 0; numPrimitives += geom->prims(g,&vertices); numVertices += vertices; } /* approximate bounds */ BBox3fa geomBound = empty, centBound = empty; size_t s = 0, t = 0, dt = max(size_t(1),numPrimitives/2048); for (size_t g=0; g<numGroups; g++) { size_t numPrims = geom->prims(g); for (size_t i=t-s; i<numPrims; i+=dt, t+=dt) { BBox3fa bounds = geom->bounds(g,i); geomBound.extend(bounds); centBound.extend(center2(bounds)); } s += numPrims; } new (&pinfo) PrimInfo(numPrimitives,geomBound,centBound); /* compute start group and primitives */ size_t g=0, i=0; for (size_t k=0; k<numTasks; k++) { size_t start = (k+0)*numPrimitives/numTasks; size_t end = (k+1)*numPrimitives/numTasks; size_t size = end-start; work[k].startGroup = g; work[k].startPrim = i; work[k].numPrims = size; for (; g<numGroups; g++) { size_t numPrims = geom->prims(g)-i; if (size < numPrims) { i += size; break; } size -= numPrims; i = 0; } } /* start parallel task */ TaskScheduler::executeTask(threadIndex,threadCount, _task_gen_parallel,this,numTasks, _task_gen_parallel_reduce,this, "build::primrefgen"); }
__forceinline SpatialSplit::Split SpatialSplit::BinInfo::best(const PrimInfo& pinfo, const Mapping& mapping, const size_t blocks_shift) { /* sweep from right to left and compute parallel prefix of merged bounds */ ssef rAreas[BINS]; ssei rCounts[BINS]; ssei count = 0; BBox3fa bx = empty; BBox3fa by = empty; BBox3fa bz = empty; for (size_t i=BINS-1; i>0; i--) { count += numEnd[i]; rCounts[i] = count; bx.extend(bounds[i][0]); rAreas[i][0] = halfArea(bx); by.extend(bounds[i][1]); rAreas[i][1] = halfArea(by); bz.extend(bounds[i][2]); rAreas[i][2] = halfArea(bz); } /* sweep from left to right and compute SAH */ ssei blocks_add = (1 << blocks_shift)-1; ssei ii = 1; ssef vbestSAH = pos_inf; ssei vbestPos = 0; count = 0; bx = empty; by = empty; bz = empty; for (size_t i=1; i<BINS; i++, ii+=1) { count += numBegin[i-1]; bx.extend(bounds[i-1][0]); float Ax = halfArea(bx); by.extend(bounds[i-1][1]); float Ay = halfArea(by); bz.extend(bounds[i-1][2]); float Az = halfArea(bz); const ssef lArea = ssef(Ax,Ay,Az,Az); const ssef rArea = rAreas[i]; const ssei lCount = (count +blocks_add) >> blocks_shift; const ssei rCount = (rCounts[i]+blocks_add) >> blocks_shift; const ssef sah = lArea*ssef(lCount) + rArea*ssef(rCount); vbestPos = select(sah < vbestSAH,ii ,vbestPos); vbestSAH = select(sah < vbestSAH,sah,vbestSAH); } /* find best dimension */ float bestSAH = inf; int bestDim = -1; int bestPos = 0; for (size_t dim=0; dim<3; dim++) { /* ignore zero sized dimensions */ if (unlikely(mapping.invalid(dim))) continue; /* test if this is a better dimension */ if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) { bestDim = dim; bestPos = vbestPos[dim]; bestSAH = vbestSAH[dim]; } } /* return invalid split if no split found */ if (bestDim == -1) return Split(inf,-1,0,mapping); /* return best found split */ return Split(bestSAH,bestDim,bestPos,mapping); }
BBox3fa TriangleMeshTriangle1v::update(char* prim, size_t num, void* geom) const { BBox3fa bounds = empty; const TriangleMesh* mesh = (const TriangleMesh*) geom; for (size_t j=0; j<num; j++) { Triangle1v& dst = ((Triangle1v*) prim)[j]; const unsigned geomID = dst.geomID(); const unsigned primID = dst.primID(); const TriangleMesh::Triangle& tri = mesh->triangle(primID); const Vec3fa v0 = mesh->vertex(tri.v[0]); const Vec3fa v1 = mesh->vertex(tri.v[1]); const Vec3fa v2 = mesh->vertex(tri.v[2]); new (&dst) Triangle1v(v0,v1,v2,geomID,primID,mesh->mask); bounds.extend(merge(BBox3fa(v0),BBox3fa(v1),BBox3fa(v2))); } return bounds; }
const StrandSplit::Split StrandSplit::find<false>(size_t threadIndex, size_t threadCount, LockStepTaskScheduler* scheduler, BezierRefList& prims) { /* first curve determines first axis */ BezierRefList::block_iterator_unsafe i = prims; Vec3fa axis0 = normalize(i->p3 - i->p0); /* find 2nd axis that is most misaligned with first axis */ float bestCos = 1.0f; Vec3fa axis1 = axis0; for (; i; i++) { Vec3fa axisi = i->p3 - i->p0; float leni = length(axisi); if (leni == 0.0f) continue; axisi /= leni; float cos = abs(dot(axisi,axis0)); if (cos < bestCos) { bestCos = cos; axis1 = axisi; } } /* partition the two strands */ size_t lnum = 0, rnum = 0; BBox3fa lbounds = empty, rbounds = empty; const LinearSpace3fa space0 = frame(axis0).transposed(); const LinearSpace3fa space1 = frame(axis1).transposed(); for (BezierRefList::block_iterator_unsafe i = prims; i; i++) { BezierPrim& prim = *i; const Vec3fa axisi = normalize(prim.p3-prim.p0); const float cos0 = abs(dot(axisi,axis0)); const float cos1 = abs(dot(axisi,axis1)); if (cos0 > cos1) { lnum++; lbounds.extend(prim.bounds(space0)); } else { rnum++; rbounds.extend(prim.bounds(space1)); } } /*! return an invalid split if we do not partition */ if (lnum == 0 || rnum == 0) return Split(inf,axis0,axis1); /*! calculate sah for the split */ const float sah = float(lnum)*halfArea(lbounds) + float(rnum)*halfArea(rbounds); return Split(sah,axis0,axis1); }
BBox3fa TriangleMeshTriangle1v::update(char* prim_i, size_t num, void* geom) const { BBox3fa bounds = empty; const TriangleMesh* mesh = (const TriangleMesh*) geom; Triangle1v* prim = (Triangle1v*) prim_i; if (num == -1) { while (true) { const unsigned geomID = prim->geomID<1>(); const unsigned primID = prim->primID<1>(); const TriangleMesh::Triangle& tri = mesh->triangle(primID); const Vec3fa v0 = mesh->vertex(tri.v[0]); const Vec3fa v1 = mesh->vertex(tri.v[1]); const Vec3fa v2 = mesh->vertex(tri.v[2]); const bool last = prim->last(); new (prim) Triangle1v(v0,v1,v2,geomID,primID,mesh->mask,last); bounds.extend(merge(BBox3fa(v0),BBox3fa(v1),BBox3fa(v2))); if (last) break; prim++; } } else { for (size_t i=0; i<num; i++, prim++) { const unsigned geomID = prim->geomID<0>(); const unsigned primID = prim->primID<0>(); const TriangleMesh::Triangle& tri = mesh->triangle(primID); const Vec3fa v0 = mesh->vertex(tri.v[0]); const Vec3fa v1 = mesh->vertex(tri.v[1]); const Vec3fa v2 = mesh->vertex(tri.v[2]); new (prim) Triangle1v(v0,v1,v2,geomID,primID,mesh->mask,false); bounds.extend(merge(BBox3fa(v0),BBox3fa(v1),BBox3fa(v2))); } } return bounds; }
void PrimRefGen<Heuristic>::task_gen_parallel(size_t threadIndex, size_t threadCount, size_t taskIndex, size_t taskCount, TaskScheduler::Event* event) { Heuristic& heuristic = heuristics[taskIndex]; new (&heuristic) Heuristic(pinfo,geom); /* static work allocation */ size_t g = work[taskIndex].startGroup; size_t i = work[taskIndex].startPrim; size_t numPrims = work[taskIndex].numPrims; size_t numGroupPrims = numPrims ? geom->prims(g) : 0; size_t numAddedPrims = 0; BBox3fa geomBound = empty, centBound = empty; typename atomic_set<PrimRefBlock>::item* block = prims.insert(alloc->malloc(threadIndex)); for (size_t p=0; p<numPrims; p++, i++) { /* goto next group */ while (i == numGroupPrims) { g++; i = 0; numGroupPrims = geom->prims(g); } const BBox3fa b = geom->bounds(g,i); if (b.empty()) continue; numAddedPrims++; geomBound.extend(b); centBound.extend(center2(b)); const PrimRef prim = PrimRef(b,g,i); if (likely(block->insert(prim))) continue; heuristic.bin(block->base(),block->size()); block = prims.insert(alloc->malloc(threadIndex)); block->insert(prim); } heuristic.bin(block->base(),block->size()); geomBounds[taskIndex] = geomBound; centBounds[taskIndex] = centBound; work[taskIndex].numPrims = numAddedPrims; }
void StrandSplit::TaskFindParallel::task_bound_parallel(size_t threadIndex, size_t threadCount, size_t taskIndex, size_t taskCount) { const LinearSpace3fa space0 = frame(axis0).transposed(); const LinearSpace3fa space1 = frame(axis1).transposed(); size_t lnum = 0; BBox3fa lbounds = empty; size_t rnum = 0; BBox3fa rbounds = empty; while (BezierRefList::item* block = iter1.next()) { for (size_t i=0; i<block->size(); i++) { BezierPrim& prim = block->at(i); const Vec3fa axisi = normalize(prim.p3-prim.p0); const float cos0 = abs(dot(axisi,axis0)); const float cos1 = abs(dot(axisi,axis1)); if (cos0 > cos1) { lnum++; lbounds.extend(prim.bounds(space0)); } else { rnum++; rbounds.extend(prim.bounds(space1)); } } } task_lnum[taskIndex] = lnum; task_lbounds[taskIndex] = lbounds; task_rnum[taskIndex] = rnum; task_rbounds[taskIndex] = rbounds; }
StrandSplit::TaskFindParallel::TaskFindParallel(size_t threadIndex, size_t threadCount, LockStepTaskScheduler* scheduler, BezierRefList& prims) { /* first curve determines first axis */ BezierRefList::block_iterator_unsafe i = prims; axis0 = axis1 = normalize(i->p3 - i->p0); /* parallel calculation of 2nd axis */ size_t numTasks = min(maxTasks,threadCount); scheduler->dispatchTask(threadIndex,numTasks,_task_bound_parallel,this,numTasks,"build::task_find_parallel"); /* select best 2nd axis */ float bestCos = 1.0f; for (size_t i=0; i<numTasks; i++) { if (task_cos[i] < bestCos) { bestCos = task_cos[i]; axis1 = task_axis1[i]; } } /* parallel calculation of unaligned bounds */ scheduler->dispatchTask(threadIndex,numTasks,_task_bound_parallel,this,numTasks,"build::task_find_parallel"); /* reduce bounds calculates by tasks */ size_t lnum = 0; BBox3fa lbounds = empty; size_t rnum = 0; BBox3fa rbounds = empty; for (size_t i=0; i<numTasks; i++) { lnum += task_lnum[i]; lbounds.extend(task_lbounds[i]); rnum += task_rnum[i]; rbounds.extend(task_rbounds[i]); } /*! return an invalid split if we do not partition */ if (lnum == 0 || rnum == 0) { split = Split(inf,axis0,axis1); return; } /*! calculate sah for the split */ const float sah = float(lnum)*halfArea(lbounds) + float(lnum)*halfArea(rbounds); split = Split(sah,axis0,axis1); }
float BVH4i::sah (NodeRef& node, const BBox3fa& bounds) { float f = bounds.empty() ? 0.0f : area(bounds); if (node.isNode()) { Node* n = node.node(nodePtr()); for (size_t c=0; c<4; c++) f += sah(n->child(c),n->bounds(c)); return f; } else { size_t num; node.leaf(triPtr(),num); return f*num; } }
float BVH4i::sah (NodeRef& node, BBox3fa bounds) { float f = bounds.empty() ? 0.0f : area(bounds); if (node.isNode()) { Node* n = node.node(nodePtr()); for (size_t c=0; c<BVH4i::N; c++) if (n->child(c) != BVH4i::invalidNode) f += sah(n->child(c),n->bounds(c)); return f; } else { unsigned int num; node.leaf(triPtr(),num); return f*num; } }
void BVH4Statistics::statistics(NodeRef node, const BBox3fa& bounds, size_t& depth) { float A = bounds.empty() ? 0.0f : area(bounds); if (node.isNode()) { numNodes++; depth = 0; size_t cdepth = 0; Node* n = node.node(); bvhSAH += A*BVH4::travCost; for (size_t i=0; i<BVH4::N; i++) { statistics(n->child(i),n->bounds(i),cdepth); depth=max(depth,cdepth); } for (size_t i=0; i<BVH4::N; i++) { if (n->child(i) == BVH4::emptyNode) { for (; i<BVH4::N; i++) { if (n->child(i) != BVH4::emptyNode) throw std::runtime_error("invalid node"); } break; } } depth++; return; } else { depth = 0; size_t num; const char* tri = node.leaf(num); if (!num) return; numLeaves++; numPrimBlocks += num; for (size_t i=0; i<num; i++) { numPrims += bvh->primTy.size(tri+i*bvh->primTy.bytes); } float sah = A * bvh->primTy.intCost * num; bvhSAH += sah; leafSAH += sah; } }
void build_morton(vector_t<PrimRef>& prims, isa::PrimInfo& pinfo) { size_t N = pinfo.size(); /* array for morton builder */ vector_t<isa::MortonID32Bit> morton_src(N); vector_t<isa::MortonID32Bit> morton_tmp(N); for (size_t i=0; i<N; i++) morton_src[i].index = i; /* fast allocator that supports thread local operation */ FastAllocator allocator; for (size_t i=0; i<2; i++) { std::cout << "iteration " << i << ": building BVH over " << N << " primitives, " << std::flush; double t0 = getSeconds(); allocator.reset(); std::pair<Node*,BBox3fa> node_bounds = isa::bvh_builder_morton<Node*>( /* thread local allocator for fast allocations */ [&] () -> FastAllocator::ThreadLocal* { return allocator.threadLocal(); }, BBox3fa(empty), /* lambda function that allocates BVH nodes */ [&] ( isa::MortonBuildRecord<Node*>& current, isa::MortonBuildRecord<Node*>* children, size_t N, FastAllocator::ThreadLocal* alloc ) -> InnerNode* { assert(N <= 2); InnerNode* node = new (alloc->malloc(sizeof(InnerNode))) InnerNode; *current.parent = node; for (size_t i=0; i<N; i++) children[i].parent = &node->children[i]; return node; }, /* lambda function that sets bounds */ [&] (InnerNode* node, const BBox3fa* bounds, size_t N) -> BBox3fa { BBox3fa res = empty; for (size_t i=0; i<N; i++) { const BBox3fa b = bounds[i]; res.extend(b); node->bounds[i] = b; } return res; }, /* lambda function that creates BVH leaves */ [&]( isa::MortonBuildRecord<Node*>& current, FastAllocator::ThreadLocal* alloc, BBox3fa& box_o) -> Node* { assert(current.size() == 1); const size_t id = morton_src[current.begin].index; const BBox3fa bounds = prims[id].bounds(); // FIXME: dont use morton_src, should be input Node* node = new (alloc->malloc(sizeof(LeafNode))) LeafNode(id,bounds); *current.parent = node; box_o = bounds; return node; }, /* lambda that calculates the bounds for some primitive */ [&] (const isa::MortonID32Bit& morton) -> BBox3fa { return prims[morton.index].bounds(); }, /* progress monitor function */ [&] (size_t dn) { // throw an exception here to cancel the build operation }, morton_src.data(),morton_tmp.data(),prims.size(),2,1024,1,1); Node* root = node_bounds.first; double t1 = getSeconds(); std::cout << 1000.0f*(t1-t0) << "ms, " << 1E-6*double(N)/(t1-t0) << " Mprims/s, sah = " << root->sah() << " [DONE]" << std::endl; } }