Ejemplo n.º 1
0
void Mesh::calculateVolume()
{
    const float dl = mAABB[0].getXSize()/VDIV;
    if (dl<0.00001) return;

    unsigned long int voxelInside=0, voxelTotal=0, xi=0;

    for (float x= mAABB[0].min.x+dl/2; x< mAABB[0].max.x; x+=dl) {
        printf("[%c] [%-2d%%]", "|/-\\"[xi++%4], (int)(100*((x-mAABB[0].min.x)/mAABB[0].getXSize())));fflush(stdout);
        for (float y= mAABB[0].min.y+dl/2; y< mAABB[0].max.y; y+=dl) {
            for (float z= mAABB[0].min.z+dl/2; z< mAABB[0].max.z; z+=dl)
            {
                /* Construct ray */
                Point ray0(x,y,z);
                Point rayFar(x*20,y*20,z*20);
                Line ray (ray0, rayFar);

                /* Count intersecting triangles */
                set<int>alreadyIntersected;
                alreadyIntersected.clear();
                list<int>::const_iterator ti;
                int bvl = BVL;
                for (int bi=BVL_SIZE(bvl-1); bi<BVL_SIZE(bvl); ++bi) {
                    if (!Geom::intersects(mAABB[bi], ray)) continue;
                    for (ti = mAABBTriangles[bi].begin(); ti!=mAABBTriangles[bi].end(); ++ti) {
                        Triangle &t = mTriangles[*ti];
                        if ((Geom::mkcode(ray.start, t.getBox()) & Geom::mkcode(ray.end, t.getBox()))) continue;
                        if ( Geom::intersects(t, ray)) {
			    alreadyIntersected.insert(*ti);
                        }
                    }
                }
                
                ++voxelTotal;
                
		/* For odd number of triangles count this voxel to the total volume */
                if (alreadyIntersected.size()%2 == 1){
                    mVoxels.push_back( Box (Point(x-dl/2.2, y-dl/2.2, z-dl/2.2), Point(x+dl/2.2, y+dl/2.2, z+dl/2.2)));
                    ++voxelInside;
                }
            }
        }
        printf ("\r");
    }
    printf("             \r");

    /* Calculate the coverage for every AABB level */
    float objVol = (mAABB[0].getVolume()*voxelInside)/voxelTotal;
    float bVol, sVol;

    for (int bvlevel=0; bvlevel<=BVL; ++bvlevel) {
        bVol=0;
        for (int bi=BVL_SIZE(bvlevel-1); bi< BVL_SIZE(bvlevel); ++bi)
            bVol += mAABB[bi].getVolume();
	
        AABBCover[bvlevel] = objVol/bVol;
    }

    int voxelCount[BVL+1];
    for (int bvlevel=1; bvlevel<= BVL; bvlevel++) 
      voxelCount[bvlevel]=0;
    
    /* Calculate the coverage for every Sphere level */
    Box sB = mSphere[0].getBox();
    float r = mSphere[0].rad;
    float sBvol = 8.0 *r*r*r;
    voxelTotal = 0;
    
    for (float x= sB.min.x+dl/2; x< sB.max.x; x+=dl) {
	for (float y= sB.min.y+dl/2; y< sB.max.y; y+=dl) {
	    for (float z= sB.min.z+dl/2; z< sB.max.z; z+=dl) {
		++voxelTotal;
		Point v(x,y,z);
		for (int bvlevel=1; bvlevel<=BVL; ++bvlevel) {
		    for (int bi=BVL_SIZE(bvlevel-1); bi< BVL_SIZE(bvlevel); ++bi) {
			if (mSphere[bi].contains(v)) {
			    ++voxelCount[bvlevel];
			    break;
			}
		    }
		}
	    } 
	}  
    }
    
    sphereCover[0] = objVol / mSphere[0].getVolume();
    for (int bvlevel=1; bvlevel<= BVL; bvlevel++)
	sphereCover[bvlevel] = objVol / (sBvol * ((float)voxelCount[bvlevel]/voxelTotal));

}
Ejemplo n.º 2
0
 void BVH4MBIntersector1<TriangleIntersector>::intersect(const BVH4MB* bvh, Ray& ray)
 {
   AVX_ZERO_UPPER();
   STAT3(normal.travs,1,1,1);
   
   /*! stack state */
   Base* popCur  = bvh->root;              //!< pre-popped top node from the stack
   float popDist = neg_inf;                //!< pre-popped distance of top node from the stack
   StackItem stack[1+3*BVH4MB::maxDepth];  //!< stack of nodes that still need to get traversed
   StackItem* stackPtr = stack+1;          //!< current stack pointer
   
   /*! offsets to select the side that becomes the lower or upper bound */
   const size_t nearX = ray.dir.x >= 0 ? 0*2*sizeof(ssef) : 1*2*sizeof(ssef);
   const size_t nearY = ray.dir.y >= 0 ? 2*2*sizeof(ssef) : 3*2*sizeof(ssef);
   const size_t nearZ = ray.dir.z >= 0 ? 4*2*sizeof(ssef) : 5*2*sizeof(ssef);
   const size_t farX  = nearX ^ 32;
   const size_t farY  = nearY ^ 32;
   const size_t farZ  = nearZ ^ 32;
   
   /*! load the ray into SIMD registers */
   const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z);
   const Vec3fa ray_rdir = rcp_safe(ray.dir);
   const sse3f rdir(ray_rdir.x,ray_rdir.y,ray_rdir.z);
   const ssef rayNear(ray.tnear);
   ssef rayFar(ray.tfar);
   
   while (true)
   {
     /*! pop next node */
     if (unlikely(stackPtr == stack)) break;
     stackPtr--;
     Base* cur = popCur;
     
     /*! if popped node is too far, pop next one */
     if (unlikely(popDist > ray.tfar)) {
       popCur  = (Base*)stackPtr[-1].ptr;
       popDist = stackPtr[-1].dist;
       continue;
     }
     
   next:
     
     /*! we mostly go into the inner node case */
     if (likely(cur->isNode()))
     {
       STAT3(normal.trav_nodes,1,1,1);
       
       /*! single ray intersection with 4 boxes */
       const Node* node = cur->node();
       const ssef* pNearX = (const ssef*)((const char*)node+nearX);
       const ssef* pNearY = (const ssef*)((const char*)node+nearY);
       const ssef* pNearZ = (const ssef*)((const char*)node+nearZ);
       const ssef tNearX = (norg.x + ssef(pNearX[0]) + ray.time*pNearX[1]) * rdir.x;
       const ssef tNearY = (norg.y + ssef(pNearY[0]) + ray.time*pNearY[1]) * rdir.y;
       const ssef tNearZ = (norg.z + ssef(pNearZ[0]) + ray.time*pNearZ[1]) * rdir.z;
       const ssef tNear = max(tNearX,tNearY,tNearZ,rayNear);
       const ssef* pFarX = (const ssef*)((const char*)node+farX);
       const ssef* pFarY = (const ssef*)((const char*)node+farY);
       const ssef* pFarZ = (const ssef*)((const char*)node+farZ);
       const ssef tFarX = (norg.x + ssef(pFarX[0]) + ray.time*pFarX[1]) * rdir.x;
       const ssef tFarY = (norg.y + ssef(pFarY[0]) + ray.time*pFarY[1]) * rdir.y;
       const ssef tFarZ = (norg.z + ssef(pFarZ[0]) + ray.time*pFarZ[1]) * rdir.z;
       popCur = (Base*) stackPtr[-1].ptr;      //!< pre-pop of topmost stack item
       popDist = stackPtr[-1].dist;            //!< pre-pop of distance of topmost stack item
       const ssef tFar = min(tFarX,tFarY,tFarZ,rayFar);
       size_t _hit = movemask(tNear <= tFar);
       
       /*! if no child is hit, pop next node */
       if (unlikely(_hit == 0))
         continue;
       
       /*! one child is hit, continue with that child */
       size_t r = __bsf(_hit); _hit = __btc(_hit,r);
       if (likely(_hit == 0)) {
         cur = node->child[r];
         goto next;
       }
       
       /*! two children are hit, push far child, and continue with closer child */
       Base* c0 = node->child[r]; const float d0 = tNear[r];
       r = __bsf(_hit); _hit = __btc(_hit,r);
       Base* c1 = node->child[r]; const float d1 = tNear[r];
       if (likely(_hit == 0)) {
         if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; goto next; }
         else         { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; goto next; }
       }
       
       /*! Here starts the slow path for 3 or 4 hit children. We push
        *  all nodes onto the stack to sort them there. */
       stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++;
       stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++;
       
       /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */
       r = __bsf(_hit); _hit = __btc(_hit,r);
       Base* c = node->child[r]; float d = tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
       if (likely(_hit == 0)) {
         sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]);
         cur = (Base*) stackPtr[-1].ptr; stackPtr--;
         goto next;
       }
       
       /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */
       r = __bsf(_hit); _hit = __btc(_hit,r);
       c = node->child[r]; d = tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
       sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]);
       cur = (Base*) stackPtr[-1].ptr; stackPtr--;
       goto next;
     }
     
     /*! this is a leaf node */
     else 
     {
       STAT3(normal.trav_leaves,1,1,1);
       size_t num; Triangle* tri = (Triangle*) cur->leaf(num);
       for (size_t i=0; i<num; i++)
         TriangleIntersector::intersect(ray,tri[i],bvh->geometry);
       
       popCur = (Base*) stackPtr[-1].ptr;  //!< pre-pop of topmost stack item
       popDist = stackPtr[-1].dist;        //!< pre-pop of distance of topmost stack item
       rayFar = ray.tfar;
     }
   }
   AVX_ZERO_UPPER();
 }
    __forceinline void BVH4Intersector4Hybrid<PrimitiveIntersector4>::intersect1(const BVH4* bvh, NodeRef root, size_t k, Ray4& ray, 
                                                                                 const sse3f& ray_org, const sse3f& ray_dir, const sse3f& ray_rdir, 
                                                                                 const ssef& ray_tnear, const ssef& ray_tfar)
    {
      /*! stack state */
      StackItem stack[stackSizeSingle];  //!< stack of nodes 
      StackItem* stackPtr = stack+1;        //!< current stack pointer
      StackItem* stackEnd = stack+stackSizeSingle;
      stack[0].ptr = root;
      stack[0].dist = neg_inf;
      
      /*! offsets to select the side that becomes the lower or upper bound */
      const size_t nearX = ray_dir.x[k] >= 0.0f ? 0*sizeof(ssef) : 1*sizeof(ssef);
      const size_t nearY = ray_dir.y[k] >= 0.0f ? 2*sizeof(ssef) : 3*sizeof(ssef);
      const size_t nearZ = ray_dir.z[k] >= 0.0f ? 4*sizeof(ssef) : 5*sizeof(ssef);
      
      /*! load the ray into SIMD registers */
      const sse3f org (ray_org .x[k],ray_org .y[k],ray_org .z[k]);
      const sse3f rdir(ray_rdir.x[k],ray_rdir.y[k],ray_rdir.z[k]);
      const sse3f norg = -org, org_rdir(org*rdir);
      ssef rayNear(ray_tnear[k]), rayFar(ray_tfar[k]); 
      
      /* pop loop */
      while (true) pop:
      {
        /*! pop next node */
        if (unlikely(stackPtr == stack)) break;
        stackPtr--;
        NodeRef cur = NodeRef(stackPtr->ptr);
        
        /*! if popped node is too far, pop next one */
        if (unlikely(stackPtr->dist > ray.tfar[k]))
          continue;
        
        /* downtraversal loop */
        while (true)
        {
          /*! stop if we found a leaf */
          if (unlikely(cur.isLeaf())) break;
          STAT3(normal.trav_nodes,1,1,1);
          
          /*! single ray intersection with 4 boxes */
          const Node* node = cur.node();
          const size_t farX  = nearX ^ 16, farY  = nearY ^ 16, farZ  = nearZ ^ 16;
#if defined (__AVX2__)
          const ssef tNearX = msub(load4f((const char*)node+nearX), rdir.x, org_rdir.x);
          const ssef tNearY = msub(load4f((const char*)node+nearY), rdir.y, org_rdir.y);
          const ssef tNearZ = msub(load4f((const char*)node+nearZ), rdir.z, org_rdir.z);
          const ssef tFarX  = msub(load4f((const char*)node+farX ), rdir.x, org_rdir.x);
          const ssef tFarY  = msub(load4f((const char*)node+farY ), rdir.y, org_rdir.y);
          const ssef tFarZ  = msub(load4f((const char*)node+farZ ), rdir.z, org_rdir.z);
#else
          const ssef tNearX = (norg.x + load4f((const char*)node+nearX)) * rdir.x;
          const ssef tNearY = (norg.y + load4f((const char*)node+nearY)) * rdir.y;
          const ssef tNearZ = (norg.z + load4f((const char*)node+nearZ)) * rdir.z;
          const ssef tFarX  = (norg.x + load4f((const char*)node+farX )) * rdir.x;
          const ssef tFarY  = (norg.y + load4f((const char*)node+farY )) * rdir.y;
          const ssef tFarZ  = (norg.z + load4f((const char*)node+farZ )) * rdir.z;
#endif

#if defined(__SSE4_1__)
          const ssef tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,rayNear));
          const ssef tFar  = mini(mini(tFarX ,tFarY ),mini(tFarZ ,rayFar ));
          const sseb vmask = cast(tNear) > cast(tFar);
          size_t mask = movemask(vmask)^0xf;
#else
          const ssef tNear = max(tNearX,tNearY,tNearZ,rayNear);
          const ssef tFar  = min(tFarX ,tFarY ,tFarZ ,rayFar);
          const sseb vmask = tNear <= tFar;
          size_t mask = movemask(vmask);
#endif
          
          /*! if no child is hit, pop next node */
          if (unlikely(mask == 0))
            goto pop;
          
          /*! one child is hit, continue with that child */
          size_t r = __bscf(mask);
          if (likely(mask == 0)) {
            cur = node->child(r);
            assert(cur != BVH4::emptyNode);
            continue;
          }
          
          /*! two children are hit, push far child, and continue with closer child */
          NodeRef c0 = node->child(r); const float d0 = tNear[r];
          r = __bscf(mask);
          NodeRef c1 = node->child(r); const float d1 = tNear[r];
          assert(c0 != BVH4::emptyNode);
          assert(c1 != BVH4::emptyNode);
          if (likely(mask == 0)) {
            assert(stackPtr < stackEnd); 
            if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; continue; }
            else         { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; continue; }
          }
          
          /*! Here starts the slow path for 3 or 4 hit children. We push
           *  all nodes onto the stack to sort them there. */
          assert(stackPtr < stackEnd); 
          stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++;
          assert(stackPtr < stackEnd); 
          stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++;
          
          /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */
          assert(stackPtr < stackEnd); 
          r = __bscf(mask);
          NodeRef c = node->child(r); float d = tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
          assert(c != BVH4::emptyNode);
          if (likely(mask == 0)) {
            sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]);
            cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
            continue;
          }
          
          /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */
          assert(stackPtr < stackEnd); 
          r = __bscf(mask);
          c = node->child(r); d = tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
          assert(c != BVH4::emptyNode);
          sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]);
          cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
        }
        
        /*! this is a leaf node */
        STAT3(normal.trav_leaves,1,1,1);
        size_t num; Primitive* prim = (Primitive*) cur.leaf(num);
        PrimitiveIntersector4::intersect(ray,k,prim,num,bvh->geometry);
        rayFar = ray.tfar[k];
      }
    }
Ejemplo n.º 4
0
    __forceinline void BVH8iIntersector8Hybrid<TriangleIntersector8>::intersect1(const BVH8i* bvh, NodeRef root, const size_t k, Ray8& ray,const avx3f &ray_org, const avx3f &ray_dir, const avx3f &ray_rdir, const avxf &ray_tnear, const avxf &ray_tfar, const avx3i& nearXYZ)
    {
      /*! stack state */
      StackItemInt64 stack[stackSizeSingle];  //!< stack of nodes 
      StackItemInt64* stackPtr = stack+1;        //!< current stack pointer
      StackItemInt64* stackEnd = stack+stackSizeSingle;
      stack[0].ptr = root;
      stack[0].dist = neg_inf;
      
      /*! offsets to select the side that becomes the lower or upper bound */
      const size_t nearX = nearXYZ.x[k];
      const size_t nearY = nearXYZ.y[k];
      const size_t nearZ = nearXYZ.z[k];

      /*! load the ray into SIMD registers */
      const avx3f org (ray_org .x[k],ray_org .y[k],ray_org .z[k]);
      const avx3f rdir(ray_rdir.x[k],ray_rdir.y[k],ray_rdir.z[k]);
      const avx3f org_rdir(org*rdir);
      avxf rayNear(ray_tnear[k]), rayFar(ray_tfar[k]);
     
      const Node     * __restrict__ nodes = (Node    *)bvh->nodePtr();
      const Triangle * __restrict__ accel = (Triangle*)bvh->triPtr();
 
      /* pop loop */
      while (true) pop:
      {
        /*! pop next node */
        if (unlikely(stackPtr == stack)) break;
        stackPtr--;
        NodeRef cur = NodeRef(stackPtr->ptr);
        
        /*! if popped node is too far, pop next one */
        if (unlikely(*(float*)&stackPtr->dist > ray.tfar[k]))
          continue;
        
        /* downtraversal loop */
        while (true)
        {
          /*! stop if we found a leaf */
          if (unlikely(cur.isLeaf())) break;
          STAT3(normal.trav_nodes,1,1,1);
          
          /*! single ray intersection with 4 boxes */
          const Node* node = (Node*)cur.node(nodes);
          const size_t farX  = nearX ^ sizeof(avxf), farY  = nearY ^ sizeof(avxf), farZ  = nearZ ^ sizeof(avxf);
#if defined (__AVX2__)
          const avxf tNearX = msub(load8f((const char*)node+nearX), rdir.x, org_rdir.x);
          const avxf tNearY = msub(load8f((const char*)node+nearY), rdir.y, org_rdir.y);
          const avxf tNearZ = msub(load8f((const char*)node+nearZ), rdir.z, org_rdir.z);
          const avxf tFarX  = msub(load8f((const char*)node+farX ), rdir.x, org_rdir.x);
          const avxf tFarY  = msub(load8f((const char*)node+farY ), rdir.y, org_rdir.y);
          const avxf tFarZ  = msub(load8f((const char*)node+farZ ), rdir.z, org_rdir.z);
#else
          const avxf tNearX = (load8f((const char*)node+nearX) - org.x) * rdir.x;
          const avxf tNearY = (load8f((const char*)node+nearY) - org.y) * rdir.y;
          const avxf tNearZ = (load8f((const char*)node+nearZ) - org.z) * rdir.z;
          const avxf tFarX  = (load8f((const char*)node+farX ) - org.x) * rdir.x;
          const avxf tFarY  = (load8f((const char*)node+farY ) - org.y) * rdir.y;
          const avxf tFarZ  = (load8f((const char*)node+farZ ) - org.z) * rdir.z;
#endif

#if defined(__AVX2__)
          const avxf tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,rayNear));
          const avxf tFar  = mini(mini(tFarX ,tFarY ),mini(tFarZ ,rayFar ));
          const avxb vmask = cast(tNear) > cast(tFar);
          unsigned int mask = movemask(vmask)^0xff;
#else
          const avxf tNear = max(tNearX,tNearY,tNearZ,rayNear);
          const avxf tFar  = min(tFarX ,tFarY ,tFarZ ,rayFar);
          const avxb vmask = tNear <= tFar;
          unsigned int mask = movemask(vmask);
#endif
          
          /*! if no child is hit, pop next node */
          if (unlikely(mask == 0))
            goto pop;
          
          /*! one child is hit, continue with that child */
          size_t r = __bscf(mask);
          if (likely(mask == 0)) {
            cur = node->child(r);
            assert(cur != BVH4i::emptyNode);
            continue;
          }
          
          /*! two children are hit, push far child, and continue with closer child */
          NodeRef c0 = node->child(r); const unsigned int d0 = ((unsigned int*)&tNear)[r];
          r = __bscf(mask);
          NodeRef c1 = node->child(r); const unsigned int d1 = ((unsigned int*)&tNear)[r];
          assert(c0 != BVH4i::emptyNode);
          assert(c1 != BVH4i::emptyNode);
          if (likely(mask == 0)) {
            assert(stackPtr < stackEnd); 
            if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; continue; }
            else         { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; continue; }
          }
          
          /*! Here starts the slow path for 3 or 4 hit children. We push
           *  all nodes onto the stack to sort them there. */
          assert(stackPtr < stackEnd); 
          stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++;
          assert(stackPtr < stackEnd); 
          stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++;
          
          /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */
          assert(stackPtr < stackEnd); 
          r = __bscf(mask);
          NodeRef c = node->child(r); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
          assert(c0 != BVH4i::emptyNode);
          if (likely(mask == 0)) {
            sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]);
            cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
            continue;
          }
          
          /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */
          assert(stackPtr < stackEnd); 
          r = __bscf(mask);
          c = node->child(r); d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
          assert(c != BVH4i::emptyNode);
	  if (likely(mask == 0)) {
	    sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]);
	    cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
	    continue;
	  }

	  while(1)
	    {
	      r = __bscf(mask);
	      c = node->child(r); d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
	      if (unlikely(mask == 0)) break;
	    }
	  cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
	  
        }
        
        /*! this is a leaf node */
        STAT3(normal.trav_leaves,1,1,1);
        size_t num; Triangle* prim = (Triangle*) cur.leaf(accel,num);
        TriangleIntersector8::intersect(ray,k,prim,num,bvh->geometry);
        rayFar = ray.tfar[k];
      }
    }
Ejemplo n.º 5
0
  void BVH4iIntersector1<TriangleIntersector>::intersect(const BVH4iIntersector1* This, Ray& ray)
  {
    AVX_ZERO_UPPER();
    STAT3(normal.travs,1,1,1);
    
    /*! stack state */
    const BVH4i* bvh = This->bvh;
    StackItem stack[1+3*BVH4i::maxDepth];  //!< stack of nodes 
    StackItem* stackPtr = stack+1;        //!< current stack pointer
    stack[0].ptr  = bvh->root;
    stack[0].dist = neg_inf;

    /*! offsets to select the side that becomes the lower or upper bound */
    const size_t nearX = ray.dir.x >= 0.0f ? 0*sizeof(ssef_m) : 1*sizeof(ssef_m);
    const size_t nearY = ray.dir.y >= 0.0f ? 2*sizeof(ssef_m) : 3*sizeof(ssef_m);
    const size_t nearZ = ray.dir.z >= 0.0f ? 4*sizeof(ssef_m) : 5*sizeof(ssef_m);
   
    /*! load the ray into SIMD registers */
    const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z);
    const Vector3f ray_rdir = rcp_safe(ray.dir);
    const sse3f rdir(ray_rdir.x,ray_rdir.y,ray_rdir.z);
    const Vector3f ray_org_rdir = ray.org*ray_rdir;
    const sse3f org_rdir(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z);
    const ssef  rayNear(ray.tnear);
    ssef rayFar(ray.tfar);

    const void* nodePtr = bvh->nodePtr();
    const void* triPtr  = bvh->triPtr();
     
    /* pop loop */
    while (true) pop:
    {
      /*! pop next node */
      if (unlikely(stackPtr == stack)) break;
      stackPtr--;
      NodeRef cur = NodeRef(stackPtr->ptr);
      
      /*! if popped node is too far, pop next one */
      if (unlikely(stackPtr->dist > ray.tfar))
        continue;

      /* downtraversal loop */
      while (true)
      {
        /*! stop if we found a leaf */
        if (unlikely(cur.isLeaf())) break;
        STAT3(normal.trav_nodes,1,1,1);
    
        /*! single ray intersection with 4 boxes */
        const Node* node = cur.node(nodePtr);
        const size_t farX  = nearX ^ 16, farY  = nearY ^ 16, farZ  = nearZ ^ 16;
#if defined (__AVX2__)
        const ssef tNearX = msub(ssef((const char*)nodePtr+(size_t)cur+nearX), rdir.x, org_rdir.x);
        const ssef tNearY = msub(ssef((const char*)nodePtr+(size_t)cur+nearY), rdir.y, org_rdir.y);
        const ssef tNearZ = msub(ssef((const char*)nodePtr+(size_t)cur+nearZ), rdir.z, org_rdir.z);
        const ssef tFarX  = msub(ssef((const char*)nodePtr+(size_t)cur+farX ), rdir.x, org_rdir.x);
        const ssef tFarY  = msub(ssef((const char*)nodePtr+(size_t)cur+farY ), rdir.y, org_rdir.y);
        const ssef tFarZ  = msub(ssef((const char*)nodePtr+(size_t)cur+farZ ), rdir.z, org_rdir.z);
#else
        const ssef tNearX = (norg.x + ssef((const char*)nodePtr+(size_t)cur+nearX)) * rdir.x;
        const ssef tNearY = (norg.y + ssef((const char*)nodePtr+(size_t)cur+nearY)) * rdir.y;
        const ssef tNearZ = (norg.z + ssef((const char*)nodePtr+(size_t)cur+nearZ)) * rdir.z;
        const ssef tFarX  = (norg.x + ssef((const char*)nodePtr+(size_t)cur+farX )) * rdir.x;
        const ssef tFarY  = (norg.y + ssef((const char*)nodePtr+(size_t)cur+farY )) * rdir.y;
        const ssef tFarZ  = (norg.z + ssef((const char*)nodePtr+(size_t)cur+farZ )) * rdir.z;
#endif
        const ssef tNear = max(tNearX,tNearY,tNearZ,rayNear);
        const ssef tFar  = min(tFarX ,tFarY ,tFarZ ,rayFar);
    size_t mask = movemask(tNear <= tFar);
        
        /*! if no child is hit, pop next node */
        if (unlikely(mask == 0))
          goto pop;

        /*! one child is hit, continue with that child */
        size_t r = __bsf(mask); mask = __btc(mask,r);
        if (likely(mask == 0)) {
          cur = node->child(r);
          continue;
        }

        /*! two children are hit, push far child, and continue with closer child */
        NodeRef c0 = node->child(r); const float d0 = tNear[r];
        r = __bsf(mask); mask = __btc(mask,r);
        NodeRef c1 = node->child(r); const float d1 = tNear[r];
        if (likely(mask == 0)) {
          if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; continue; }
          else         { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; continue; }
        }

        /*! Here starts the slow path for 3 or 4 hit children. We push
         *  all nodes onto the stack to sort them there. */
        stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++;
        stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++;

        /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */
        r = __bsf(mask); mask = __btc(mask,r);
        NodeRef c = node->child(r); float d = tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
        if (likely(mask == 0)) {
          sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]);
          cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
          continue;
        }

        /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */
        r = __bsf(mask); mask = __btc(mask,r);
        c = node->child(r); d = tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
        sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]);
        cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
      }

      /*! this is a leaf node */
      STAT3(normal.trav_leaves,1,1,1);
      size_t num; Triangle* tri = (Triangle*) cur.leaf(triPtr,num);
      for (size_t i=0; i<num; i++)
        TriangleIntersector::intersect(ray,tri[i],bvh->vertices);
      
      rayFar = ray.tfar;
    }
    AVX_ZERO_UPPER();
  }