void BVH4BuilderTopLevel::recurseSAH(size_t depth, BuildRecord& task, const size_t mode, const size_t threadID, const size_t numThreads)
 {
   /* return leaf node */
   assert(task.end-task.begin > 0);
   if (unlikely(task.end-task.begin == 1)) {
     *(NodeRef*)task.parentNode = refs[task.begin].node;
     return;
   }
   
   /* create leaf node */
   if (unlikely(task.depth >= BVH4::maxBuildDepth)) {
     createLeaf(task,threadID,numThreads);
     return;
   }
   
   /*! initialize task list */
   BuildRecord childTasks[4];
   childTasks[0] = task;
   size_t numChildren = 1;
   
   /*! split until node is full */
   do {
     
     /*! find best child to split */
     float bestArea = inf; 
     ssize_t bestChild = -1;
     for (size_t i=0; i<numChildren; i++) 
     {
       float A = childTasks[i].sceneArea();
       size_t items = childTasks[i].items();
       if (items > 1 && A <= bestArea) { 
         bestChild = i; 
         bestArea = A; 
       }
     }
     if (bestChild == -1) break;
     
     /*! split best child into left and right child */
     __align(64) BuildRecord left, right;
     split(childTasks[bestChild],left,right,mode,threadID,numThreads);
     
     /* add new children left and right */
     left.depth = right.depth = task.depth+1;
     childTasks[bestChild] = childTasks[numChildren-1];
     childTasks[numChildren-1] = left;
     childTasks[numChildren+0] = right;
     numChildren++;
     
   } while (numChildren < 4);
   
   /* recurse */
   BVH4::Node* node = bvh->allocNode(threadID);
   for (ssize_t i=numChildren-1; i>=0; i--) {
     childTasks[i].parentNode = (size_t)&node->child(i);
     recurse(depth+1,childTasks[i],mode,threadID,numThreads);
     node->set(i,childTasks[i].bounds.geometry);
   }
   
   *(NodeRef*)task.parentNode = bvh->encodeNode(node);
 }
Exemple #2
0
  /* prints the bvh4.triangle4v data structure */
  void print_bvh4_triangle4v(BVH4::NodeRef node, size_t depth)
  {
    if (node.isNode())
    {
      BVH4::Node* n = node.node();
      
      std::cout << "Node {" << std::endl;
      for (size_t i=0; i<BVH4::N; i++) 
      {
        for (size_t k=0; k<depth; k++) std::cout << "  ";
        std::cout << "  bounds" << i << " = " << n->bounds(i) << std::endl;
      }

      for (size_t i=0; i<BVH4::N; i++) 
      {
        if (n->child(i) == BVH4::emptyNode)
          continue;

        for (size_t k=0; k<depth; k++) std::cout << "  ";
        std::cout << "  child" << i << " = ";
        print_bvh4_triangle4v(n->child(i),depth+1); 
      }
      for (size_t k=0; k<depth; k++) std::cout << "  ";
      std::cout << "}" << std::endl;
    }
    else
    {
      size_t num; 
      const Triangle4v* tri = (const Triangle4v*) node.leaf(num);

      std::cout << "Leaf {" << std::endl;
      for (size_t i=0; i<num; i++) {
        for (size_t j=0; j<tri[i].size(); j++) {
          for (size_t k=0; k<depth; k++) std::cout << "  ";
          std::cout << "  Triangle { v0 = (" << tri[i].v0.x[j] << ", " << tri[i].v0.y[j] << ", " << tri[i].v0.z[j] << "),  "
            "v1 = (" << tri[i].v1.x[j] << ", " << tri[i].v1.y[j] << ", " << tri[i].v1.z[j] << "), "
            "v2 = (" << tri[i].v2.x[j] << ", " << tri[i].v2.y[j] << ", " << tri[i].v2.z[j] << "), "
            "geomID = " << tri[i].geomID(j) << ", primID = " << tri[i].primID(j) << " }" << std::endl;
        }
      }
      for (size_t k=0; k<depth; k++) std::cout << "  ";
      std::cout << "}" << std::endl;
    }
  }
    void BVH4BuilderTopLevel::createLeaf(BuildRecord& current, size_t threadIndex, size_t threadCount)
    {
#if defined(DEBUG)
      if (current.depth > BVH4::maxBuildDepthLeaf) 
        throw std::runtime_error("ERROR: depth limit reached");
#endif
      
      /* return empty node */
      if (current.end-current.begin == 0) {
        *(NodeRef*)current.parentNode = BVH4::emptyNode;
        return;
      }
      
      /* return leaf node */
      if (current.end-current.begin == 1) {
        *(NodeRef*)current.parentNode = refs[current.begin].node;
        return;
      }
      
      /* first split level */
      BuildRecord record0, record1;
      split_fallback2(&refs[0],current,record0,record1);
      
      /* second split level */
      BuildRecord children[4];
      split_fallback2(&refs[0],record0,children[0],children[1]);
      split_fallback2(&refs[0],record1,children[2],children[3]);
      
      /* allocate next four nodes */
      BVH4::Node* node = bvh->allocNode(threadIndex);
      *(NodeRef*)current.parentNode = bvh->encodeNode(node);
      
      /* recurse into each child */
      for (size_t i=0; i<4; i++) 
      {
        children[i].parentNode = (size_t)&node->child(i);
        children[i].depth = current.depth+1;
        createLeaf(children[i],threadIndex,threadCount);
        node->set(i,children[i].bounds.geometry);
      }
      BVH4::compact(node); // move empty nodes to the end
    }  
 void BVH4BuilderTopLevel::open_sequential()
 {
   size_t N = max(2*refs.size(),size_t(MIN_OPEN_SIZE));
   refs.reserve(N);
   
   std::make_heap(refs.begin(),refs.end());
   while (refs.size()+3 <= N)
   {
     std::pop_heap (refs.begin(),refs.end()); 
     BVH4::NodeRef ref = refs.back().node;
     if (ref.isLeaf()) break;
     refs.pop_back();    
     
     BVH4::Node* node = ref.node();
     for (size_t i=0; i<4; i++) {
       if (node->child(i) == BVH4::emptyNode) continue;
       refs.push_back(BuildRef(node->bounds(i),node->child(i)));
       std::push_heap (refs.begin(),refs.end()); 
     }
   }
 }
    void BVH4BuilderTwoLevel::build(size_t threadIndex, size_t threadCount) 
    {
      /* delete some objects */
      size_t N = scene->size();
      if (N < objects.size()) {
        parallel_for(N, objects.size(), [&] (const range<size_t>& r) {
            for (size_t i=r.begin(); i<r.end(); i++) {
              delete builders[i]; builders[i] = nullptr;
              delete objects[i]; objects[i] = nullptr;
            }
          });
      }

      /* reset memory allocator */
      bvh->alloc.reset();
      
      /* skip build for empty scene */
      const size_t numPrimitives = scene->getNumPrimitives<TriangleMesh,1>();
      if (numPrimitives == 0) {
        prims.resize(0);
        bvh->set(BVH4::emptyNode,empty,0);
        return;
      }

      double t0 = bvh->preBuild(TOSTRING(isa) "::BVH4BuilderTwoLevel");

#if PROFILE
	profile(2,20,numPrimitives,[&] (ProfileTimer& timer)
        {
#endif
          
      /* resize object array if scene got larger */
      if (objects.size()  < N) objects.resize(N);
      if (builders.size() < N) builders.resize(N);
      if (refs.size()     < N) refs.resize(N);
      nextRef = 0;
      
      /* create of acceleration structures */
      parallel_for(size_t(0), N, [&] (const range<size_t>& r) 
      {
        for (size_t objectID=r.begin(); objectID<r.end(); objectID++)
        {
          TriangleMesh* mesh = scene->getTriangleMeshSafe(objectID);
          
          /* verify meshes got deleted properly */
          if (mesh == nullptr || mesh->numTimeSteps != 1) {
            assert(objectID < objects.size () && objects[objectID] == nullptr);
            assert(objectID < builders.size() && builders[objectID] == nullptr);
            continue;
          }
          
          /* create BVH and builder for new meshes */
          if (objects[objectID] == nullptr)
            createTriangleMeshAccel(mesh,(AccelData*&)objects[objectID],builders[objectID]);
        }
      });

      /* parallel build of acceleration structures */
      parallel_for(size_t(0), N, [&] (const range<size_t>& r) 
      {
        for (size_t objectID=r.begin(); objectID<r.end(); objectID++)
        {
          /* ignore if no triangle mesh or not enabled */
          TriangleMesh* mesh = scene->getTriangleMeshSafe(objectID);
          if (mesh == nullptr || !mesh->isEnabled() || mesh->numTimeSteps != 1) 
            continue;
        
          BVH4*    object  = objects [objectID]; assert(object);
          Builder* builder = builders[objectID]; assert(builder);
          
          /* build object if it got modified */
#if !PROFILE 
          if (mesh->isModified()) 
#endif
            builder->build(0,0);
          
          /* create build primitive */
          if (!object->bounds.empty())
            refs[nextRef++] = BVH4BuilderTwoLevel::BuildRef(object->bounds,object->root);
        }
      });
      
      /* fast path for single geometry scenes */
      if (nextRef == 1) { 
        bvh->set(refs[0].node,refs[0].bounds(),numPrimitives);
        return;
      }

      /* open all large nodes */
      refs.resize(nextRef);
      open_sequential(numPrimitives); 
      
      /* fast path for small geometries */
      if (refs.size() == 1) { 
        bvh->set(refs[0].node,refs[0].bounds(),numPrimitives);
        return;
      }

      /* compute PrimRefs */
      prims.resize(refs.size());
      const PrimInfo pinfo = parallel_reduce(size_t(0), refs.size(), size_t(1024), PrimInfo(empty), [&] (const range<size_t>& r) -> PrimInfo
      {
        PrimInfo pinfo(empty);
        for (size_t i=r.begin(); i<r.end(); i++) {
          pinfo.add(refs[i].bounds());
          prims[i] = PrimRef(refs[i].bounds(),(size_t)refs[i].node);
        }
        return pinfo;
      }, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); });

      /* skip if all objects where empty */
      if (pinfo.size() == 0)
        bvh->set(BVH4::emptyNode,empty,0);

      /* otherwise build toplevel hierarchy */
      else
      {
        BVH4::NodeRef root;
        BVHBuilderBinnedSAH::build<BVH4::NodeRef>
          (root,
           [&] { return bvh->alloc.threadLocal2(); },
           [&] (const isa::BVHBuilderBinnedSAH::BuildRecord& current, BVHBuilderBinnedSAH::BuildRecord* children, const size_t N, FastAllocator::ThreadLocal2* alloc) -> int
           {
             BVH4::Node* node = (BVH4::Node*) alloc->alloc0.malloc(sizeof(BVH4::Node)); node->clear();
             for (size_t i=0; i<N; i++) {
               node->set(i,children[i].pinfo.geomBounds);
               children[i].parent = (size_t*)&node->child(i);
             }
             *current.parent = bvh->encodeNode(node);
             return 0;
           },
           [&] (const BVHBuilderBinnedSAH::BuildRecord& current, FastAllocator::ThreadLocal2* alloc) -> int
           {
             assert(current.prims.size() == 1);
             *current.parent = (BVH4::NodeRef) prims[current.prims.begin()].ID();
             return 1;
           },
           [&] (size_t dn) { bvh->scene->progressMonitor(0); },
           prims.data(),pinfo,BVH4::N,BVH4::maxBuildDepthLeaf,4,1,1,1.0f,1.0f);
        
        bvh->set(root,pinfo.geomBounds,numPrimitives);
      }

#if PROFILE
      }); 
#endif

      bvh->alloc.cleanup();
      bvh->postBuild(t0);
    }
      void build(size_t, size_t) 
      {
        /* progress monitor */
        auto progress = [&] (size_t dn) { bvh->scene->progressMonitor(dn); };
        auto virtualprogress = BuildProgressMonitorFromClosure(progress);

        /* fast path for empty BVH */
        const size_t numPrimitives = scene->getNumPrimitives<BezierCurves,1>();
        if (numPrimitives == 0) {
          prims.clear();
          bvh->set(BVH4::emptyNode,empty,0);
          return;
        }

        double t0 = bvh->preBuild(TOSTRING(isa) "::BVH4BuilderHairSAH");

        //profile(1,5,numPrimitives,[&] (ProfileTimer& timer) {
        
        /* create primref array */
        bvh->alloc.init_estimate(numPrimitives*sizeof(Primitive));
        prims.resize(numPrimitives);
        const PrimInfo pinfo = createBezierRefArray<1>(scene,prims,virtualprogress);
        
        /* build hierarchy */
        BVH4::NodeRef root = bvh_obb_builder_binned_sah
          (
            [&] () { return bvh->alloc.threadLocal2(); },

            [&] (const PrimInfo* children, const size_t numChildren, 
                 HeuristicArrayBinningSAH<BezierPrim> alignedHeuristic, 
                 FastAllocator::ThreadLocal2* alloc) -> BVH4::Node* 
            {
              BVH4::Node* node = (BVH4::Node*) alloc->alloc0.malloc(sizeof(BVH4::Node),16); node->clear();
              for (size_t i=0; i<numChildren; i++)
                node->set(i,children[i].geomBounds);
              return node;
            },
            
            [&] (const PrimInfo* children, const size_t numChildren, 
                 UnalignedHeuristicArrayBinningSAH<BezierPrim> unalignedHeuristic, 
                 FastAllocator::ThreadLocal2* alloc) -> BVH4::UnalignedNode*
            {
              BVH4::UnalignedNode* node = (BVH4::UnalignedNode*) alloc->alloc0.malloc(sizeof(BVH4::UnalignedNode),16); node->clear();
              for (size_t i=0; i<numChildren; i++) 
              {
                const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]); 
                const PrimInfo       sinfo = unalignedHeuristic.computePrimInfo(children[i],space);
                node->set(i,OBBox3fa(space,sinfo.geomBounds));
              }
              return node;
            },

            [&] (size_t depth, const PrimInfo& pinfo, FastAllocator::ThreadLocal2* alloc) -> BVH4::NodeRef
            {
              size_t items = pinfo.size();
              size_t start = pinfo.begin;
              Primitive* accel = (Primitive*) alloc->alloc1.malloc(items*sizeof(Primitive));
              BVH4::NodeRef node = bvh->encodeLeaf((char*)accel,items);
              for (size_t i=0; i<items; i++) {
                accel[i].fill(prims.data(),start,pinfo.end,bvh->scene,false);
              }
              return node;
            },
            progress,
            prims.data(),pinfo,BVH4::N,BVH4::maxBuildDepthLeaf,1,1,BVH4::maxLeafBlocks);
        
        bvh->set(root,pinfo.geomBounds,pinfo.size());
        
        //});
        
        /* clear temporary data for static geometry */
        if (scene->isStatic()) prims.clear();
        bvh->alloc.cleanup();
        bvh->postBuild(t0);
      }
    void BVH4BuilderTopLevel::task_open_parallel(size_t threadIndex, size_t threadCount, size_t taskIndex, size_t taskCount, TaskScheduler::Event* event)
    {
      size_t N = global_dest;
      size_t M = refs1.size();
      const size_t start0 = (threadIndex+0)*N/threadCount;
      const size_t end0   = (threadIndex+1)*N/threadCount;
      const size_t start1 = (threadIndex+0)*M/threadCount;
      const size_t end1   = (threadIndex+1)*M/threadCount;
      assert(end1-start1 >= end0-start0);
      BuildRef* prefs1 = &refs1[0];
      
      /* copy from refs buffer to refs1 buffer */
      for (size_t i=start0, j=start1; i<end0; i++, j++) 
        refs1[j] = refs[i];
      
      /* create max heap in our set of items */
      size_t start = start1;
      size_t end   = start1+end0-start0;
      std::make_heap(&prefs1[start],&prefs1[end]);
      float max_volume = 0.0f;
      
      while (true) 
      {
        barrier.wait(threadIndex,threadCount);
        if (threadIndex == 0) global_max_volume = 0.0f;
        barrier.wait(threadIndex,threadCount);
        
        /* parallel calculation of maximal volume */
        max_volume = 0.0f;
        if (end+3 <= end1)
          for (size_t i=start; i<end; i++)
            max_volume = max(max_volume,prefs1[i].lower.w);
        
        atomic_max_f32(&global_max_volume,max_volume);
        
        barrier.wait(threadIndex,threadCount);
        max_volume = global_max_volume;
        barrier.wait(threadIndex,threadCount);
        
        /* if maximal volume is 0, all threads are finished */
        if (max_volume == 0.0f) break;
                
        /* open all nodes that are considered large in this iteration */
        while (end+3 <= end1)
        {
		  if (end-start == 0) break;
          std::pop_heap(&prefs1[start],&prefs1[end]); 
          BVH4::NodeRef ref = prefs1[end-1].node;
          float vol = prefs1[end-1].lower.w;
		  if (ref.isLeaf() || vol < 0.5f*max_volume) {
			std::push_heap(&prefs1[start],&prefs1[end]); 
			break;
		  }
          end--;
          
          BVH4::Node* node = ref.node();
          for (size_t i=0; i<4; i++) {
            if (node->child(i) == BVH4::emptyNode) continue;
            prefs1[end++] = BuildRef(node->bounds(i),node->child(i));
            std::push_heap(&prefs1[start],&prefs1[end]); 
          }
        }
      }
      
      if (threadIndex == 0) global_dest = 0;
      barrier.wait(threadIndex,threadCount);
      
      /* copy again back to refs array */
      size_t dest = atomic_add(&global_dest,end-start);
      for (size_t i=start, j=dest; i<end; i++, j++) 
        refs[j] = refs1[i];
    }