Beispiel #1
0
    bool BVH4BuilderFast::splitParallel(BuildRecord &current, BuildRecord &leftChild, BuildRecord &rightChild, const size_t threadID, const size_t numThreads)
    {
      const unsigned int items = current.end - current.begin;
      assert(items >= BUILD_RECORD_SPLIT_THRESHOLD);
      
      /* mark as leaf if leaf threshold reached */
      if (items <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) {
        current.createLeaf();
        return false;
      }
      
      /* use primitive array temporarily for parallel splits */
      PrimRef* tmp = (PrimRef*) primAllocator.base();

      /* parallel binning of centroids */
      g_state->parallelBinner.bin(current,prims,tmp,threadID,numThreads);
      
      /* find best split */
      Split split; 
      g_state->parallelBinner.best(split);
      
      /* if we cannot find a valid split, enforce an arbitrary split */
      if (unlikely(split.pos == -1)) split_fallback(prims,current,leftChild,rightChild);
      
      /* parallel partitioning of items */
      else g_state->parallelBinner.partition(tmp,prims,split,leftChild,rightChild,threadID,numThreads);
      
      if (leftChild.items()  <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) leftChild.createLeaf();
      if (rightChild.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) rightChild.createLeaf();
      return true;
    }
Beispiel #2
0
 bool BVH4BuilderFast::splitSequential(BuildRecord& current, BuildRecord& leftChild, BuildRecord& rightChild)
 {
   /* mark as leaf if leaf threshold reached */
   if (current.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) {
     current.createLeaf();
     return false;
   }
   
   /* calculate binning function */
   Mapping<16> mapping(current.bounds);
   
   /* binning of centroids */
   Binner<16> binner;
   binner.bin(prims,current.begin,current.end,mapping);
   
   /* find best split */
   Split split; 
   binner.best(split,mapping);
   
   /* if we cannot find a valid split, enforce an arbitrary split */
   if (unlikely(split.pos == -1)) split_fallback(prims,current,leftChild,rightChild);
   
   /* partitioning of items */
   else binner.partition(prims, current.begin, current.end, split, mapping, leftChild, rightChild);
   
   if (leftChild.items()  <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) leftChild.createLeaf();
   if (rightChild.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) rightChild.createLeaf();	
   return true;
 }
 __forceinline void BVH4BuilderTopLevel::split(BuildRecord& current, BuildRecord& left, BuildRecord& right, const size_t mode, const size_t threadID, const size_t numThreads)
 {
   if (mode == BUILD_TOP_LEVEL && current.items() >= BUILD_RECORD_SPLIT_THRESHOLD)
     return split_parallel(current,left,right,threadID,numThreads);		  
   else
     return split_sequential(current,left,right);
 }
Beispiel #4
0
 void BVH4BuilderFast::createTriangle4vLeaf(const BVH4BuilderFast* This, BuildRecord& current, Allocator& leafAlloc, size_t threadID)
 {
   size_t items = current.items();
   size_t start = current.begin;
   assert(items<=4);
   
   /* allocate leaf node */
   Triangle4v* accel = (Triangle4v*) leafAlloc.malloc(sizeof(Triangle4v));
   *(NodeRef*)current.parentNode = This->bvh->encodeLeaf((char*)accel,1);
   
   ssei vgeomID = -1, vprimID = -1, vmask = -1;
   sse3f v0 = zero, v1 = zero, v2 = zero;
   
   for (size_t i=0; i<items; i++)
   {
     const size_t geomID = This->prims[start+i].geomID();
     const size_t primID = This->prims[start+i].primID();
     const TriangleMesh* __restrict__ const mesh = This->scene->getTriangleMesh(geomID);
     const TriangleMesh::Triangle& tri = mesh->triangle(primID);
     const Vec3fa& p0 = mesh->vertex(tri.v[0]);
     const Vec3fa& p1 = mesh->vertex(tri.v[1]);
     const Vec3fa& p2 = mesh->vertex(tri.v[2]);
     vgeomID [i] = geomID;
     vprimID [i] = primID;
     vmask   [i] = mesh->mask;
     v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
     v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
     v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
   }
   Triangle4v::store_nt(accel,Triangle4v(v0,v1,v2,vgeomID,vprimID,vmask));
 }
Beispiel #5
0
 void BVH4BuilderFast::createTriangle1vLeaf(const BVH4BuilderFast* This, BuildRecord& current, Allocator& leafAlloc, size_t threadID)
 {
   size_t items = current.items();
   size_t start = current.begin;
   assert(items<=4);
   
   /* allocate leaf node */
   Triangle1v* accel = (Triangle1v*) leafAlloc.malloc(items*sizeof(Triangle1v));
   *(NodeRef*)current.parentNode = This->bvh->encodeLeaf((char*)accel,items);
   
   for (size_t i=0; i<items; i++) 
   {	
     const size_t geomID = This->prims[start+i].geomID();
     const size_t primID = This->prims[start+i].primID();
     const TriangleMesh* __restrict__ const mesh = This->scene->getTriangleMesh(geomID);
     const TriangleMesh::Triangle& tri = mesh->triangle(primID);
     
     const ssef v0 = select(0x7,(ssef)mesh->vertex(tri.v[0]),zero);
     const ssef v1 = select(0x7,(ssef)mesh->vertex(tri.v[1]),zero);
     const ssef v2 = select(0x7,(ssef)mesh->vertex(tri.v[2]),zero);
     
     const ssef e1 = v0 - v1;
     const ssef e2 = v2 - v0;	     
     const ssef normal = cross(e1,e2);
     
     store4f_nt(&accel[i].v0,cast(insert<3>(cast(v0),primID)));
     store4f_nt(&accel[i].v1,cast(insert<3>(cast(v1),geomID)));
     store4f_nt(&accel[i].v2,cast(insert<3>(cast(v2),mesh->mask)));
   }
 }
 __forceinline void BVH4BuilderTopLevel::recurse(size_t depth, BuildRecord& current, const size_t mode, const size_t threadID, const size_t numThreads)
 {
   if (mode == BUILD_TOP_LEVEL) {
     g_state->global_workStack.push_nolock(current);
   }
   else if (current.items() > THRESHOLD_FOR_SUBTREE_RECURSION) {
     if (!g_state->thread_workStack[threadID].push(current))
       recurseSAH(depth,current,RECURSE,threadID,numThreads);
   }
   else
     recurseSAH(depth,current,RECURSE,threadID,numThreads);
 }
Beispiel #7
0
 __forceinline void BVH4BuilderFast::recurse(BuildRecord& current, Allocator& nodeAlloc, Allocator& leafAlloc, const size_t mode, const size_t threadID, const size_t numThreads)
 {
   if (mode == BUILD_TOP_LEVEL) {
     g_state->workStack.push_nolock(current);
   }
   else if (mode == RECURSE_PARALLEL && current.items() > THRESHOLD_FOR_SUBTREE_RECURSION) {
     if (!g_state->threadStack[threadID].push(current))
       recurseSAH(current,nodeAlloc,leafAlloc,RECURSE_SEQUENTIAL,threadID,numThreads);
   }
   else
     recurseSAH(current,nodeAlloc,leafAlloc,mode,threadID,numThreads);
 }
Beispiel #8
0
    void BVH4BuilderFast::createLeaf(BuildRecord& current, Allocator& nodeAlloc, Allocator& leafAlloc, size_t threadIndex, size_t threadCount)
    {
#if defined(DEBUG)
      if (current.depth > BVH4::maxBuildDepthLeaf) 
        throw std::runtime_error("ERROR: depth limit reached");
#endif
      
      /* create leaf for few primitives */
      if (current.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) {
        createSmallLeaf(this,current,leafAlloc,threadIndex);
        return;
      }
      
      /* first split level */
      BuildRecord record0, record1;
      split_fallback(prims,current,record0,record1);
      
      /* second split level */
      BuildRecord children[4];
      split_fallback(prims,record0,children[0],children[1]);
      split_fallback(prims,record1,children[2],children[3]);

      /* allocate node */
      Node* node = (Node*) nodeAlloc.malloc(sizeof(Node)); node->clear();
      *(NodeRef*)current.parentNode = bvh->encodeNode(node);
      
      /* recurse into each child */
      for (size_t i=0; i<4; i++) 
      {
        node->set(i,children[i].bounds.geometry);
        children[i].parentNode = (size_t)&node->child(i);
        children[i].depth = current.depth+1;
        createLeaf(children[i],nodeAlloc,leafAlloc,threadIndex,threadCount);
      }
      BVH4::compact(node); // move empty nodes to the end
    }
Beispiel #9
0
    void BVH4BuilderFast::build_parallel(size_t threadIndex, size_t threadCount, size_t taskIndex, size_t taskCount, TaskScheduler::Event* event) 
    {
      /* wait for all threads to enter */
      g_state->barrier.wait(threadIndex,threadCount);
      
      /* start measurement */
      double t0 = 0.0f;
      if (g_verbose >= 2) t0 = getSeconds();
      
      /* all worker threads enter tasking system */
      if (threadIndex != 0) {
        g_state->scheduler.dispatchTaskMainLoop(threadIndex,threadCount); 
        return;
      }
      
      /* calculate list of primrefs */
      global_bounds.reset();
      g_state->scheduler.dispatchTask( task_computePrimRefs, this, threadIndex, threadCount );
      bvh->bounds = global_bounds.geometry;
      
      /* initialize node and leaf allocator */
      nodeAllocator.reset();
      primAllocator.reset();
      __aligned(64) Allocator nodeAlloc(nodeAllocator);
      __aligned(64) Allocator leafAlloc(primAllocator);

      /* create initial build record */
      BuildRecord br;
      br.init(global_bounds,0,numPrimitives);
      br.depth = 1;
      br.parentNode = (size_t)&bvh->root;
      
      /* initialize thread-local work stacks */
      for (size_t i=0; i<threadCount; i++)
        g_state->threadStack[i].reset();
      
      /* push initial build record to global work stack */
      g_state->workStack.reset();
      g_state->workStack.push_nolock(br);    
      
      /* work in multithreaded toplevel mode until sufficient subtasks got generated */
      while (g_state->workStack.size() < 4*threadCount && g_state->workStack.size()+BVH4::N <= SIZE_WORK_STACK) 
      {
        BuildRecord br;

        /* pop largest item for better load balancing */
        if (!g_state->workStack.pop_nolock_largest(br)) 
          break;
        
        /* guarantees to create no leaves in this stage */
        if (br.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD)
          break;

        recurseSAH(br,nodeAlloc,leafAlloc,BUILD_TOP_LEVEL,threadIndex,threadCount);
      }
      
      /* now process all created subtasks on multiple threads */
      g_state->scheduler.dispatchTask(task_buildSubTrees, this, threadIndex, threadCount );
      
      /* release all threads again */
      g_state->scheduler.releaseThreads(threadCount);
      
      /* stop measurement */
      if (g_verbose >= 2) dt = getSeconds()-t0;
    }