예제 #1
0
    void BVH4BuilderFast::build_sequential(size_t threadIndex, size_t threadCount) 
    {
      /* start measurement */
      double t0 = 0.0f;
      if (g_verbose >= 2) t0 = getSeconds();
      
      /* initialize node and leaf allocator */
      nodeAllocator.reset();
      primAllocator.reset();
      __aligned(64) Allocator nodeAlloc(nodeAllocator);
      __aligned(64) Allocator leafAlloc(primAllocator);
     
      /* create prim refs */
      global_bounds.reset();
      computePrimRefs(0,1);
      bvh->bounds = global_bounds.geometry;

      /* create initial build record */
      BuildRecord br;
      br.init(global_bounds,0,numPrimitives);
      br.depth = 1;
      br.parentNode = (size_t)&bvh->root;

      /* build BVH in single thread */
      recurseSAH(br,nodeAlloc,leafAlloc,RECURSE_SEQUENTIAL,threadIndex,threadCount);

      /* stop measurement */
      if (g_verbose >= 2) dt = getSeconds()-t0;
    }
예제 #2
0
    bool BVH4BuilderFast::splitParallel(BuildRecord &current, BuildRecord &leftChild, BuildRecord &rightChild, const size_t threadID, const size_t numThreads)
    {
      const unsigned int items = current.end - current.begin;
      assert(items >= BUILD_RECORD_SPLIT_THRESHOLD);
      
      /* mark as leaf if leaf threshold reached */
      if (items <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) {
        current.createLeaf();
        return false;
      }
      
      /* use primitive array temporarily for parallel splits */
      PrimRef* tmp = (PrimRef*) primAllocator.base();

      /* parallel binning of centroids */
      g_state->parallelBinner.bin(current,prims,tmp,threadID,numThreads);
      
      /* find best split */
      Split split; 
      g_state->parallelBinner.best(split);
      
      /* if we cannot find a valid split, enforce an arbitrary split */
      if (unlikely(split.pos == -1)) split_fallback(prims,current,leftChild,rightChild);
      
      /* parallel partitioning of items */
      else g_state->parallelBinner.partition(tmp,prims,split,leftChild,rightChild,threadID,numThreads);
      
      if (leftChild.items()  <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) leftChild.createLeaf();
      if (rightChild.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) rightChild.createLeaf();
      return true;
    }
예제 #3
0
 void BVH4Triangle4vBuilderFast::createSmallLeaf(BuildRecord& current, Allocator& leafAlloc, size_t threadID)
 {
   size_t items = current.size();
   size_t start = current.begin;
   assert(items<=4);
   
   /* allocate leaf node */
   Triangle4v* accel = (Triangle4v*) leafAlloc.malloc(sizeof(Triangle4v));
   *current.parent = bvh->encodeLeaf((char*)accel,1);
   
   ssei vgeomID = -1, vprimID = -1, vmask = -1;
   sse3f v0 = zero, v1 = zero, v2 = zero;
   
   for (size_t i=0; i<items; i++)
   {
     const size_t geomID = prims[start+i].geomID();
     const size_t primID = prims[start+i].primID();
     const TriangleMesh* __restrict__ const mesh = scene->getTriangleMesh(geomID);
     const TriangleMesh::Triangle& tri = mesh->triangle(primID);
     const Vec3fa& p0 = mesh->vertex(tri.v[0]);
     const Vec3fa& p1 = mesh->vertex(tri.v[1]);
     const Vec3fa& p2 = mesh->vertex(tri.v[2]);
     vgeomID [i] = geomID;
     vprimID [i] = primID;
     vmask   [i] = mesh->mask;
     v0.x[i] = p0.x; v0.y[i] = p0.y; v0.z[i] = p0.z;
     v1.x[i] = p1.x; v1.y[i] = p1.y; v1.z[i] = p1.z;
     v2.x[i] = p2.x; v2.y[i] = p2.y; v2.z[i] = p2.z;
   }
   Triangle4v::store_nt(accel,Triangle4v(v0,v1,v2,vgeomID,vprimID,vmask));
 }
예제 #4
0
 void BVH4Triangle1vBuilderFast::createSmallLeaf(BuildRecord& current, Allocator& leafAlloc, size_t threadID)
 {
   size_t items = current.size();
   size_t start = current.begin;
   assert(items<=4);
   
   /* allocate leaf node */
   Triangle1v* accel = (Triangle1v*) leafAlloc.malloc(items*sizeof(Triangle1v));
   *current.parent = bvh->encodeLeaf((char*)accel,items);
   
   for (size_t i=0; i<items; i++) 
   {	
     const size_t geomID = prims[start+i].geomID();
     const size_t primID = prims[start+i].primID();
     const TriangleMesh* __restrict__ const mesh = scene->getTriangleMesh(geomID);
     const TriangleMesh::Triangle& tri = mesh->triangle(primID);
     
     const ssef v0 = select(0x7,(ssef)mesh->vertex(tri.v[0]),zero);
     const ssef v1 = select(0x7,(ssef)mesh->vertex(tri.v[1]),zero);
     const ssef v2 = select(0x7,(ssef)mesh->vertex(tri.v[2]),zero);
     
     const ssef e1 = v0 - v1;
     const ssef e2 = v2 - v0;	     
     const ssef normal = cross(e1,e2);
     
     store4f_nt(&accel[i].v0,cast(insert<3>(cast(v0),primID)));
     store4f_nt(&accel[i].v1,cast(insert<3>(cast(v1),geomID)));
     store4f_nt(&accel[i].v2,cast(insert<3>(cast(v2),mesh->mask)));
   }
 }
예제 #5
0
 __forceinline void BVH4BuilderTopLevel::split(BuildRecord& current, BuildRecord& left, BuildRecord& right, const size_t mode, const size_t threadID, const size_t numThreads)
 {
   if (mode == BUILD_TOP_LEVEL && current.items() >= BUILD_RECORD_SPLIT_THRESHOLD)
     return split_parallel(current,left,right,threadID,numThreads);		  
   else
     return split_sequential(current,left,right);
 }
예제 #6
0
 __forceinline void BVH4BuilderTopLevel::recurse(size_t depth, BuildRecord& current, const size_t mode, const size_t threadID, const size_t numThreads)
 {
   if (mode == BUILD_TOP_LEVEL) {
     g_state->global_workStack.push_nolock(current);
   }
   else if (current.items() > THRESHOLD_FOR_SUBTREE_RECURSION) {
     if (!g_state->thread_workStack[threadID].push(current))
       recurseSAH(depth,current,RECURSE,threadID,numThreads);
   }
   else
     recurseSAH(depth,current,RECURSE,threadID,numThreads);
 }
예제 #7
0
 __forceinline void BVH4BuilderFast::recurse(BuildRecord& current, Allocator& nodeAlloc, Allocator& leafAlloc, const size_t mode, const size_t threadID, const size_t numThreads)
 {
   if (mode == BUILD_TOP_LEVEL) {
     g_state->workStack.push_nolock(current);
   }
   else if (mode == RECURSE_PARALLEL && current.items() > THRESHOLD_FOR_SUBTREE_RECURSION) {
     if (!g_state->threadStack[threadID].push(current))
       recurseSAH(current,nodeAlloc,leafAlloc,RECURSE_SEQUENTIAL,threadID,numThreads);
   }
   else
     recurseSAH(current,nodeAlloc,leafAlloc,mode,threadID,numThreads);
 }
예제 #8
0
 bool BVH4BuilderFast::splitSequential(BuildRecord& current, BuildRecord& leftChild, BuildRecord& rightChild)
 {
   /* mark as leaf if leaf threshold reached */
   if (current.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) {
     current.createLeaf();
     return false;
   }
   
   /* calculate binning function */
   Mapping<16> mapping(current.bounds);
   
   /* binning of centroids */
   Binner<16> binner;
   binner.bin(prims,current.begin,current.end,mapping);
   
   /* find best split */
   Split split; 
   binner.best(split,mapping);
   
   /* if we cannot find a valid split, enforce an arbitrary split */
   if (unlikely(split.pos == -1)) split_fallback(prims,current,leftChild,rightChild);
   
   /* partitioning of items */
   else binner.partition(prims, current.begin, current.end, split, mapping, leftChild, rightChild);
   
   if (leftChild.items()  <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) leftChild.createLeaf();
   if (rightChild.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) rightChild.createLeaf();	
   return true;
 }
예제 #9
0
    void BVH4UserGeometryBuilderFast::createSmallLeaf(BuildRecord& current, Allocator& leafAlloc, size_t threadID)
    {
      size_t items = current.size();
      size_t start = current.begin;
      
      /* allocate leaf node */
      AccelSetItem* accel = (AccelSetItem*) leafAlloc.malloc(sizeof(AccelSetItem)*items);
      *current.parent = bvh->encodeLeaf(accel,items);
      
      for (size_t i=0; i<items; i++)
      {
	const PrimRef& prim = prims[start+i];
	accel[i].accel = (AccelSet*) (UserGeometryBase*) scene->get(prim.geomID()); //(*accels)[prim.geomID()];
        accel[i].item  = prim.primID();
      }
    }
예제 #10
0
    void BVH4Bezier1iBuilderFast::createSmallLeaf(BuildRecord& current, Allocator& leafAlloc, size_t threadID)
    {
      size_t items = current.size();
      size_t start = current.begin;
            
      /* allocate leaf node */
      Bezier1i* accel = (Bezier1i*) leafAlloc.malloc(items*sizeof(Bezier1i));
      *current.parent = bvh->encodeLeaf((char*)accel,items);
      
      for (size_t i=0; i<items; i++) 
      {	
	const size_t geomID = prims[start+i].geomID();
        const size_t primID = prims[start+i].primID();
	const BezierCurves* curves = scene->getBezierCurves(geomID);
	const Vec3fa& p0 = curves->vertex(curves->curve(primID));
	new (&accel[i]) Bezier1i(&p0,geomID,primID);
      }
    }
예제 #11
0
    void BVH4Triangle4iBuilderFast::createSmallLeaf(BuildRecord& current, Allocator& leafAlloc, size_t threadID)
    {
      size_t items = current.size();
      size_t start = current.begin;
      assert(items<=4);
      
      /* allocate leaf node */
      Triangle4i* accel = (Triangle4i*) leafAlloc.malloc(sizeof(Triangle4i));
      *current.parent = bvh->encodeLeaf((char*)accel,1);
      
      ssei geomID = -1, primID = -1;
      Vec3f* v0[4] = { NULL, NULL, NULL, NULL };
      ssei v1 = zero, v2 = zero;
      
      for (size_t i=0; i<items; i++)
      {
	const PrimRef& prim = prims[start+i];
	const TriangleMesh* mesh = scene->getTriangleMesh(prim.geomID());
	const TriangleMesh::Triangle& tri = mesh->triangle(prim.primID());
	geomID[i] = prim.geomID();
	primID[i] = prim.primID();
	v0[i] = (Vec3f*) &mesh->vertex(tri.v[0]); 
	v1[i] = (int*)&mesh->vertex(tri.v[1])-(int*)v0[i]; 
	v2[i] = (int*)&mesh->vertex(tri.v[2])-(int*)v0[i]; 
      }

      for (size_t i=items; i<4; i++)
      {
	geomID[i] = -1;
	primID[i] = -1;
	v0[i] = v0[0];
	v1[i] = 0; 
	v2[i] = 0;
      }
    
      new (accel) Triangle4i(v0,v1,v2,geomID,primID);
    }
예제 #12
0
    void BVH4BuilderFast::createLeaf(BuildRecord& current, Allocator& nodeAlloc, Allocator& leafAlloc, size_t threadIndex, size_t threadCount)
    {
#if defined(DEBUG)
      if (current.depth > BVH4::maxBuildDepthLeaf) 
        throw std::runtime_error("ERROR: depth limit reached");
#endif
      
      /* create leaf for few primitives */
      if (current.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD) {
        createSmallLeaf(this,current,leafAlloc,threadIndex);
        return;
      }
      
      /* first split level */
      BuildRecord record0, record1;
      split_fallback(prims,current,record0,record1);
      
      /* second split level */
      BuildRecord children[4];
      split_fallback(prims,record0,children[0],children[1]);
      split_fallback(prims,record1,children[2],children[3]);

      /* allocate node */
      Node* node = (Node*) nodeAlloc.malloc(sizeof(Node)); node->clear();
      *(NodeRef*)current.parentNode = bvh->encodeNode(node);
      
      /* recurse into each child */
      for (size_t i=0; i<4; i++) 
      {
        node->set(i,children[i].bounds.geometry);
        children[i].parentNode = (size_t)&node->child(i);
        children[i].depth = current.depth+1;
        createLeaf(children[i],nodeAlloc,leafAlloc,threadIndex,threadCount);
      }
      BVH4::compact(node); // move empty nodes to the end
    }
예제 #13
0
    void BVH4BuilderTopLevel::build_toplevel(size_t threadIndex, size_t threadCount)
    {
      /* calculate scene bounds */
      Centroid_Scene_AABB bounds; bounds.reset();
      for (size_t i=0; i<threadCount; i++)
        bounds.extend(g_state->thread_bounds[i]);
      
      /* ignore empty scenes */
      //bvh->clear();
      bvh->bounds = bounds.geometry;
      refs.resize(nextRef);
      if (refs.size() == 0) return;
      
      double t0 = 0.0;
      if (g_verbose >= 2) {
        std::cout << "building BVH4<" << bvh->primTy.name << "> with toplevel SAH builder ... " << std::flush;
        t0 = getSeconds();
      }
      
      /* open all large nodes */
#if 0
      open_sequential();
      refs1.resize(refs.size());
#else
      global_dest = refs.size();
      size_t M = max(size_t(2*global_dest),size_t(MIN_OPEN_SIZE));
      refs .resize(M);
      refs1.resize(M);
      barrier.init(threadCount);
      TaskScheduler::executeTask(threadIndex,threadCount,_task_open_parallel,this,threadCount,"toplevel_open_parallel");
      refs.resize(global_dest);
#endif
      bvh->init(refs.size());

      /* start toplevel build */
      BuildRecord task; 
      task.init(bounds,0,refs.size());
      task.parentNode = (size_t)&bvh->root;
      task.depth = 1;
      
      /* initialize thread-local work stacks */
      for (size_t i=0; i<threadCount; i++)
        g_state->thread_workStack[i].reset();
      
      /* push initial build record to global work stack */
      g_state->global_workStack.reset();
      g_state->global_workStack.push_nolock(task);    
      
      /* work in multithreaded toplevel mode until sufficient subtasks got generated */
      while (g_state->global_workStack.size() < 4*threadCount && g_state->global_workStack.size()+BVH4::N <= SIZE_WORK_STACK) 
      {
        BuildRecord br;
        if (!g_state->global_workStack.pop_nolock_largest(br)) break;
        recurseSAH(0,br,BUILD_TOP_LEVEL,threadIndex,threadCount);
      }
      
      /* now process all created subtasks on multiple threads */
      TaskScheduler::executeTask(threadIndex,threadCount,_task_build_subtrees,this,threadCount,"toplevel_build_subtrees");
      
      if (g_verbose >= 2) {
        double t1 = getSeconds();
        std::cout << "[DONE]" << std::endl;
        std::cout << "  dt = " << 1000.0f*(t1-t0) << "ms" << std::endl;
        std::cout << BVH4Statistics(bvh).str();
      }
    }
예제 #14
0
    void BVH4BuilderFast::build_parallel(size_t threadIndex, size_t threadCount, size_t taskIndex, size_t taskCount, TaskScheduler::Event* event) 
    {
      /* wait for all threads to enter */
      g_state->barrier.wait(threadIndex,threadCount);
      
      /* start measurement */
      double t0 = 0.0f;
      if (g_verbose >= 2) t0 = getSeconds();
      
      /* all worker threads enter tasking system */
      if (threadIndex != 0) {
        g_state->scheduler.dispatchTaskMainLoop(threadIndex,threadCount); 
        return;
      }
      
      /* calculate list of primrefs */
      global_bounds.reset();
      g_state->scheduler.dispatchTask( task_computePrimRefs, this, threadIndex, threadCount );
      bvh->bounds = global_bounds.geometry;
      
      /* initialize node and leaf allocator */
      nodeAllocator.reset();
      primAllocator.reset();
      __aligned(64) Allocator nodeAlloc(nodeAllocator);
      __aligned(64) Allocator leafAlloc(primAllocator);

      /* create initial build record */
      BuildRecord br;
      br.init(global_bounds,0,numPrimitives);
      br.depth = 1;
      br.parentNode = (size_t)&bvh->root;
      
      /* initialize thread-local work stacks */
      for (size_t i=0; i<threadCount; i++)
        g_state->threadStack[i].reset();
      
      /* push initial build record to global work stack */
      g_state->workStack.reset();
      g_state->workStack.push_nolock(br);    
      
      /* work in multithreaded toplevel mode until sufficient subtasks got generated */
      while (g_state->workStack.size() < 4*threadCount && g_state->workStack.size()+BVH4::N <= SIZE_WORK_STACK) 
      {
        BuildRecord br;

        /* pop largest item for better load balancing */
        if (!g_state->workStack.pop_nolock_largest(br)) 
          break;
        
        /* guarantees to create no leaves in this stage */
        if (br.items() <= QBVH_BUILDER_LEAF_ITEM_THRESHOLD)
          break;

        recurseSAH(br,nodeAlloc,leafAlloc,BUILD_TOP_LEVEL,threadIndex,threadCount);
      }
      
      /* now process all created subtasks on multiple threads */
      g_state->scheduler.dispatchTask(task_buildSubTrees, this, threadIndex, threadCount );
      
      /* release all threads again */
      g_state->scheduler.releaseThreads(threadCount);
      
      /* stop measurement */
      if (g_verbose >= 2) dt = getSeconds()-t0;
    }
예제 #15
0
    void BVH4BuilderFast::recurseSAH(BuildRecord& current, Allocator& nodeAlloc, Allocator& leafAlloc, const size_t mode, const size_t threadID, const size_t numThreads)
    {
      __aligned(64) BuildRecord children[BVH4::N];
      
      /* create leaf node */
      if (current.depth >= BVH4::maxBuildDepth || current.isLeaf()) {
        assert(mode != BUILD_TOP_LEVEL);
        createLeaf(current,nodeAlloc,leafAlloc,threadID,numThreads);
        return;
      }

      /* fill all 4 children by always splitting the one with the largest surface area */
      unsigned int numChildren = 1;
      children[0] = current;

      do {
        
        /* find best child with largest bounding box area */
        int bestChild = -1;
        float bestArea = neg_inf;
        for (unsigned int i=0; i<numChildren; i++)
        {
          /* ignore leaves as they cannot get split */
          if (children[i].isLeaf())
            continue;
          
          /* remember child with largest area */
          if (children[i].sceneArea() > bestArea) { 
            bestArea = children[i].sceneArea();
            bestChild = i;
          }
        }
        if (bestChild == -1) break;
        
        /*! split best child into left and right child */
        __aligned(64) BuildRecord left, right;
        if (!split(children[bestChild],left,right,mode,threadID,numThreads)) 
          continue;
        
        /* add new children left and right */
        left.depth = right.depth = current.depth+1;
        children[bestChild] = children[numChildren-1];
        children[numChildren-1] = left;
        children[numChildren+0] = right;
        numChildren++;
        
      } while (numChildren < BVH4::N);

      /* create leaf node if no split is possible */
      if (numChildren == 1) {
        assert(mode != BUILD_TOP_LEVEL);
        createLeaf(current,nodeAlloc,leafAlloc,threadID,numThreads);
        return;
      }
      
      /* allocate node */
      Node* node = (Node*) nodeAlloc.malloc(sizeof(Node)); node->clear();
      *(NodeRef*)current.parentNode = bvh->encodeNode(node);
      
      /* recurse into each child */
      for (unsigned int i=0; i<numChildren; i++) 
      {  
        node->set(i,children[i].bounds.geometry);
        children[i].parentNode = (size_t)&node->child(i);
        children[i].depth = current.depth+1;
        recurse(children[i],nodeAlloc,leafAlloc,mode,threadID,numThreads);
      }
    }