void BVH4BuilderTopLevel::build (size_t threadIndex, size_t threadCount, size_t objectID)
    {
      /* ignore if no triangle mesh or not enabled */
      TriangleMesh* mesh = scene->getTriangleMeshSafe(objectID);
      if (mesh == NULL || !mesh->isEnabled() || mesh->numTimeSteps != 1) 
        return;
      
      BVH4*    object  = objects [objectID]; assert(object);
      Builder* builder = builders[objectID]; assert(builder);
            
      /* build object if it got modified */
      if (mesh->isModified()) {
        builder->build(threadIndex,threadCount);
        mesh->state = Geometry::ENABLED;
      }
      
      /* create build primitive */
      if (!object->bounds.empty())
	refs[nextRef++] = BuildRef(object->bounds,object->root);
    }
 void BVH4BuilderTopLevel::open_sequential()
 {
   size_t N = max(2*refs.size(),size_t(MIN_OPEN_SIZE));
   refs.reserve(N);
   
   std::make_heap(refs.begin(),refs.end());
   while (refs.size()+3 <= N)
   {
     std::pop_heap (refs.begin(),refs.end()); 
     BVH4::NodeRef ref = refs.back().node;
     if (ref.isLeaf()) break;
     refs.pop_back();    
     
     BVH4::Node* node = ref.node();
     for (size_t i=0; i<4; i++) {
       if (node->child(i) == BVH4::emptyNode) continue;
       refs.push_back(BuildRef(node->bounds(i),node->child(i)));
       std::push_heap (refs.begin(),refs.end()); 
     }
   }
 }
    void BVH4BuilderTopLevel::task_open_parallel(size_t threadIndex, size_t threadCount, size_t taskIndex, size_t taskCount, TaskScheduler::Event* event)
    {
      size_t N = global_dest;
      size_t M = refs1.size();
      const size_t start0 = (threadIndex+0)*N/threadCount;
      const size_t end0   = (threadIndex+1)*N/threadCount;
      const size_t start1 = (threadIndex+0)*M/threadCount;
      const size_t end1   = (threadIndex+1)*M/threadCount;
      assert(end1-start1 >= end0-start0);
      BuildRef* prefs1 = &refs1[0];
      
      /* copy from refs buffer to refs1 buffer */
      for (size_t i=start0, j=start1; i<end0; i++, j++) 
        refs1[j] = refs[i];
      
      /* create max heap in our set of items */
      size_t start = start1;
      size_t end   = start1+end0-start0;
      std::make_heap(&prefs1[start],&prefs1[end]);
      float max_volume = 0.0f;
      
      while (true) 
      {
        barrier.wait(threadIndex,threadCount);
        if (threadIndex == 0) global_max_volume = 0.0f;
        barrier.wait(threadIndex,threadCount);
        
        /* parallel calculation of maximal volume */
        max_volume = 0.0f;
        if (end+3 <= end1)
          for (size_t i=start; i<end; i++)
            max_volume = max(max_volume,prefs1[i].lower.w);
        
        atomic_max_f32(&global_max_volume,max_volume);
        
        barrier.wait(threadIndex,threadCount);
        max_volume = global_max_volume;
        barrier.wait(threadIndex,threadCount);
        
        /* if maximal volume is 0, all threads are finished */
        if (max_volume == 0.0f) break;
                
        /* open all nodes that are considered large in this iteration */
        while (end+3 <= end1)
        {
		  if (end-start == 0) break;
          std::pop_heap(&prefs1[start],&prefs1[end]); 
          BVH4::NodeRef ref = prefs1[end-1].node;
          float vol = prefs1[end-1].lower.w;
		  if (ref.isLeaf() || vol < 0.5f*max_volume) {
			std::push_heap(&prefs1[start],&prefs1[end]); 
			break;
		  }
          end--;
          
          BVH4::Node* node = ref.node();
          for (size_t i=0; i<4; i++) {
            if (node->child(i) == BVH4::emptyNode) continue;
            prefs1[end++] = BuildRef(node->bounds(i),node->child(i));
            std::push_heap(&prefs1[start],&prefs1[end]); 
          }
        }
      }
      
      if (threadIndex == 0) global_dest = 0;
      barrier.wait(threadIndex,threadCount);
      
      /* copy again back to refs array */
      size_t dest = atomic_add(&global_dest,end-start);
      for (size_t i=start, j=dest; i<end; i++, j++) 
        refs[j] = refs1[i];
    }