void BVH4mbIntersector16Single::occluded(mic_i* valid_i, BVH4mb* bvh, Ray16& ray16)
    {
      /* near and node stack */
      __align(64) NodeRef stack_node[3*BVH4i::maxDepth+1];

      /* setup */
      const mic_m m_valid     = *(mic_i*)valid_i != mic_i(0);
      const mic3f rdir16      = rcp_safe(ray16.dir);
      unsigned int terminated = toInt(!m_valid);
      const mic_f inf         = mic_f(pos_inf);
      const mic_f zero        = mic_f::zero();

      const Node               * __restrict__ nodes = (Node     *)bvh->nodePtr();
      const BVH4mb::Triangle01 * __restrict__ accel = (BVH4mb::Triangle01 *)bvh->triPtr();

      stack_node[0] = BVH4i::invalidNode;

      long rayIndex = -1;
      while((rayIndex = bitscan64(rayIndex,toInt(m_valid))) != BITSCAN_NO_BIT_SET_64)	    
        {
	  stack_node[1] = bvh->root;
	  size_t sindex = 2;

	  const mic_f org_xyz      = loadAOS4to16f(rayIndex,ray16.org.x,ray16.org.y,ray16.org.z);
	  const mic_f dir_xyz      = loadAOS4to16f(rayIndex,ray16.dir.x,ray16.dir.y,ray16.dir.z);
	  const mic_f rdir_xyz     = loadAOS4to16f(rayIndex,rdir16.x,rdir16.y,rdir16.z);
	  const mic_f org_rdir_xyz = org_xyz * rdir_xyz;
	  const mic_f min_dist_xyz = broadcast1to16f(&ray16.tnear[rayIndex]);
	  const mic_f max_dist_xyz = broadcast1to16f(&ray16.tfar[rayIndex]);
	  const mic_f time         = broadcast1to16f(&ray16.time[rayIndex]);

	  const unsigned int leaf_mask = BVH4I_LEAF_MASK;

	  while (1)
	    {
	      NodeRef curNode = stack_node[sindex-1];
	      sindex--;

	      const mic_f one_time = (mic_f::one() - time);
            
	      while (1) 
		{
		  /* test if this is a leaf node */
		  if (unlikely(curNode.isLeaf(leaf_mask))) break;
        
		  const Node* __restrict__ const node = curNode.node(nodes);
		  const float* __restrict const plower = (float*)node->lower;
		  const float* __restrict const pupper = (float*)node->upper;

		  prefetch<PFHINT_L1>((char*)node + 0*64);
		  prefetch<PFHINT_L1>((char*)node + 1*64);
		  prefetch<PFHINT_L1>((char*)node + 2*64);
		  prefetch<PFHINT_L1>((char*)node + 3*64);

		  const BVH4mb::Node* __restrict__ const nodeMB = (BVH4mb::Node*)node;
		  const mic_f lower = one_time  * load16f((float*)nodeMB->lower) + time * load16f((float*)nodeMB->lower_t1);
		  const mic_f upper = one_time  * load16f((float*)nodeMB->upper) + time * load16f((float*)nodeMB->upper_t1);
        
		  /* intersect single ray with 4 bounding boxes */
		  const mic_f tLowerXYZ = lower * rdir_xyz - org_rdir_xyz;
		  const mic_f tUpperXYZ = upper * rdir_xyz - org_rdir_xyz;

		  const mic_f tLower = mask_min(0x7777,min_dist_xyz,tLowerXYZ,tUpperXYZ);
		  const mic_f tUpper = mask_max(0x7777,max_dist_xyz,tLowerXYZ,tUpperXYZ);

		  const Node* __restrict__ const next = curNode.node(nodes);
		  prefetch<PFHINT_L2>((char*)next + 0);
		  prefetch<PFHINT_L2>((char*)next + 64);

		  sindex--;
		  const mic_f tNear = vreduce_max4(tLower);
		  const mic_f tFar  = vreduce_min4(tUpper);  
		  const mic_m hitm = le(0x8888,tNear,tFar);
		  const mic_f tNear_pos = select(hitm,tNear,inf);

		  curNode = stack_node[sindex]; // early pop of next node

		  /* if no child is hit, continue with early popped child */
		  if (unlikely(none(hitm))) continue;
		  sindex++;
        
		  const unsigned long hiti = toInt(hitm);
		  const unsigned long pos_first = bitscan64(hiti);
		  const unsigned long num_hitm = countbits(hiti); 
        
		  /* if a single child is hit, continue with that child */
		  curNode = ((unsigned int *)plower)[pos_first];
		  if (likely(num_hitm == 1)) continue;
        
		  /* if two children are hit, push in correct order */
		  const unsigned long pos_second = bitscan64(pos_first,hiti);
		  if (likely(num_hitm == 2))
		    {
		      const unsigned int dist_first  = ((unsigned int*)&tNear)[pos_first];
		      const unsigned int dist_second = ((unsigned int*)&tNear)[pos_second];
		      const unsigned int node_first  = curNode;
		      const unsigned int node_second = ((unsigned int*)plower)[pos_second];
          
		      if (dist_first <= dist_second)
			{
			  stack_node[sindex] = node_second;
			  sindex++;
			  assert(sindex < 3*BVH4i::maxDepth+1);
			  continue;
			}
		      else
			{
			  stack_node[sindex] = curNode;
			  curNode = node_second;
			  sindex++;
			  assert(sindex < 3*BVH4i::maxDepth+1);
			  continue;
			}
		    }
        
		  /* continue with closest child and push all others */
		  const mic_f min_dist = set_min_lanes(tNear_pos);
		  const unsigned int old_sindex = sindex;
		  sindex += countbits(hiti) - 1;
		  assert(sindex < 3*BVH4i::maxDepth+1);
        
		  const mic_m closest_child = eq(hitm,min_dist,tNear);
		  const unsigned long closest_child_pos = bitscan64(closest_child);
		  const mic_m m_pos = andn(hitm,andn(closest_child,(mic_m)((unsigned int)closest_child - 1)));
		  const mic_i plower_node = load16i((int*)plower);
		  curNode = ((unsigned int*)plower)[closest_child_pos];
		  compactustore16i(m_pos,&stack_node[old_sindex],plower_node);
		}
	  
	    

	      /* return if stack is empty */
	      if (unlikely(curNode == BVH4i::invalidNode)) break;


	      /* intersect one ray against four triangles */

	      //////////////////////////////////////////////////////////////////////////////////////////////////

	      const BVH4mb::Triangle01* tptr  = (BVH4mb::Triangle01*) curNode.leaf(accel);

	      prefetch<PFHINT_L1>((mic_f*)tptr +  0); 
	      prefetch<PFHINT_L1>((mic_f*)tptr +  1); 
	      prefetch<PFHINT_L1>((mic_f*)tptr +  2); 
	      prefetch<PFHINT_L1>((mic_f*)tptr +  3); 

	      const mic_i and_mask = broadcast4to16i(zlc4);
	      
	      const mic_f v0_t0 = gather_4f_zlc(and_mask,
						(float*)&tptr[0].t0.v0,
						(float*)&tptr[1].t0.v0,
						(float*)&tptr[2].t0.v0,
						(float*)&tptr[3].t0.v0);
	      
	      const mic_f v1_t0 = gather_4f_zlc(and_mask,
						(float*)&tptr[0].t0.v1,
						(float*)&tptr[1].t0.v1,
						(float*)&tptr[2].t0.v1,
						(float*)&tptr[3].t0.v1);
	      
	      const mic_f v2_t0 = gather_4f_zlc(and_mask,
						(float*)&tptr[0].t0.v2,
						(float*)&tptr[1].t0.v2,
						(float*)&tptr[2].t0.v2,
						(float*)&tptr[3].t0.v2);


	      prefetch<PFHINT_L2>((mic_f*)tptr +  4); 
	      prefetch<PFHINT_L2>((mic_f*)tptr +  5); 
	      prefetch<PFHINT_L2>((mic_f*)tptr +  6); 
	      prefetch<PFHINT_L2>((mic_f*)tptr +  7); 

	      const mic_f v0_t1 = gather_4f_zlc(and_mask,
						(float*)&tptr[0].t1.v0,
						(float*)&tptr[1].t1.v0,
						(float*)&tptr[2].t1.v0,
						(float*)&tptr[3].t1.v0);
	      
	      const mic_f v1_t1 = gather_4f_zlc(and_mask,
						(float*)&tptr[0].t1.v1,
						(float*)&tptr[1].t1.v1,
						(float*)&tptr[2].t1.v1,
						(float*)&tptr[3].t1.v1);
	      
	      const mic_f v2_t1 = gather_4f_zlc(and_mask,
						(float*)&tptr[0].t1.v2,
						(float*)&tptr[1].t1.v2,
						(float*)&tptr[2].t1.v2,
						(float*)&tptr[3].t1.v2);

	      const mic_f v0 = v0_t0 * one_time + time * v0_t1;
	      const mic_f v1 = v1_t0 * one_time + time * v1_t1;
	      const mic_f v2 = v2_t0 * one_time + time * v2_t1;

	      const mic_f e1 = v1 - v0;
	      const mic_f e2 = v0 - v2;	     
	      const mic_f normal = lcross_zxy(e1,e2);
	      const mic_f org = v0 - org_xyz;
	      const mic_f odzxy = msubr231(org * swizzle(dir_xyz,_MM_SWIZ_REG_DACB), dir_xyz, swizzle(org,_MM_SWIZ_REG_DACB));
	      const mic_f den = ldot3_zxy(dir_xyz,normal);	      
	      const mic_f rcp_den = rcp(den);
	      const mic_f uu = ldot3_zxy(e2,odzxy); 
	      const mic_f vv = ldot3_zxy(e1,odzxy); 
	      const mic_f u = uu * rcp_den;
	      const mic_f v = vv * rcp_den;

#if defined(__BACKFACE_CULLING__)
	      const mic_m m_init = (mic_m)0x1111 & (den > zero);
#else
	      const mic_m m_init = 0x1111;
#endif

	      const mic_m valid_u = ge((mic_m)m_init,u,zero);
	      const mic_m valid_v = ge(valid_u,v,zero);
	      const mic_m m_aperture = le(valid_v,u+v,mic_f::one()); 

	      const mic_f nom = ldot3_zxy(org,normal);
	      const mic_f t = rcp_den*nom;
	      if (unlikely(none(m_aperture))) continue;

	      mic_m m_final  = lt(lt(m_aperture,min_dist_xyz,t),t,max_dist_xyz);

#if defined(__USE_RAY_MASK__)
	      const mic_i rayMask(ray16.mask[rayIndex]);
	      const mic_i triMask = swDDDD(gather16i_4i_align(&tptr[0].t0.v2,&tptr[1].t0.v2,&tptr[2].t0.v2,&tptr[3].t0.v2));
	      const mic_m m_ray_mask = (rayMask & triMask) != mic_i::zero();
	      m_final &= m_ray_mask;	      
#endif

	      if (unlikely(any(m_final)))
		{
		  terminated |= mic_m::shift1[rayIndex];
		  break;
		}
	      //////////////////////////////////////////////////////////////////////////////////////////////////

	    }


	  if (unlikely(all(toMask(terminated)))) break;
	}


      store16i(m_valid & toMask(terminated),&ray16.geomID,0);
    }
Ejemplo n.º 2
0
    void BVH4iIntersector16Chunk<LeafIntersector,ENABLE_COMPRESSED_BVH4I_NODES>::occluded(mic_i* valid_i, BVH4i* bvh, Ray16& ray)
    {
      /* allocate stack */
      __aligned(64) mic_f    stack_dist[3*BVH4i::maxDepth+1];
      __aligned(64) NodeRef stack_node[3*BVH4i::maxDepth+1];

      /* load ray */
      const mic_m valid = *(mic_i*)valid_i != mic_i(0);
      mic_m m_terminated = !valid;
      const mic3f rdir = rcp_safe(ray.dir);
      const mic3f org_rdir = ray.org * rdir;
      mic_f ray_tnear = select(valid,ray.tnear,pos_inf);
      mic_f ray_tfar  = select(valid,ray.tfar ,neg_inf);
      const mic_f inf = mic_f(pos_inf);
      
      /* push root node */
      stack_node[0] = BVH4i::invalidNode;
      stack_dist[0] = inf;
      stack_node[1] = bvh->root;
      stack_dist[1] = ray_tnear; 
      NodeRef* __restrict__ sptr_node = stack_node + 2;
      mic_f*   __restrict__ sptr_dist = stack_dist + 2;
      
      const Node      * __restrict__ nodes = (Node     *)bvh->nodePtr();
      const Triangle1 * __restrict__ accel = (Triangle1*)bvh->triPtr();

      const mic3f org = ray.org;
      const mic3f dir = ray.dir;

      while (1)
      {
	const mic_m m_active = !m_terminated;

        /* pop next node from stack */
        NodeRef curNode = *(sptr_node-1);
        mic_f curDist   = *(sptr_dist-1);
        sptr_node--;
        sptr_dist--;
	const mic_m m_stackDist = gt(m_active,ray_tfar,curDist);

	/* stack emppty ? */
        if (unlikely(curNode == BVH4i::invalidNode))  break;
        
        /* cull node if behind closest hit point */

        if (unlikely(none(m_stackDist))) { continue; }

	const unsigned int leaf_mask = BVH4I_LEAF_MASK;

	traverse_chunk_occluded<ENABLE_COMPRESSED_BVH4I_NODES>(curNode,
							       curDist,
							       rdir,
							       org_rdir,
							       ray_tnear,
							       ray_tfar,
							       m_active,
							       sptr_node,
							       sptr_dist,
							       nodes,
							       leaf_mask);            		    	
        
        /* return if stack is empty */
        if (unlikely(curNode == BVH4i::invalidNode)) break;
        
        /* intersect leaf */
        mic_m m_valid_leaf = gt(m_active,ray_tfar,curDist);
        STAT3(shadow.trav_leaves,1,popcnt(m_valid_leaf),16);

	LeafIntersector::occluded16(curNode,m_valid_leaf,dir,org,ray,m_terminated,accel,(Scene*)bvh->geometry);

        if (unlikely(all(m_terminated))) break;
        ray_tfar = select(m_terminated,neg_inf,ray_tfar);
      }
      store16i(valid & m_terminated,&ray.geomID,0);
    }
Ejemplo n.º 3
0
    void BVH8Intersector16Chunk<PrimitiveIntersector16>::occluded(bool16* valid_i, BVH8* bvh, Ray16& ray)
    {
#if defined(__AVX512__)
      
      /* load ray */
      const bool16 valid = *valid_i;
      bool16 terminated = !valid;
      const Vec3f16 rdir = rcp_safe(ray.dir);
      const Vec3f16 org_rdir = ray.org * rdir;
      float16 ray_tnear = select(valid,ray.tnear,pos_inf);
      float16 ray_tfar  = select(valid,ray.tfar ,neg_inf);
      const float16 inf = float16(pos_inf);
      Precalculations pre(valid,ray);

      /* allocate stack and push root node */
      float16    stack_near[3*BVH8::maxDepth+1];
      NodeRef stack_node[3*BVH8::maxDepth+1];
      stack_node[0] = BVH8::invalidNode;
      stack_near[0] = inf;
      stack_node[1] = bvh->root;
      stack_near[1] = ray_tnear; 
      NodeRef* __restrict__ sptr_node = stack_node + 2;
      float16*    __restrict__ sptr_near = stack_near + 2;
      
      while (1)
      {
        /* pop next node from stack */
        sptr_node--;
        sptr_near--;
        NodeRef cur = *sptr_node;
        if (unlikely(cur == BVH8::invalidNode)) 
          break;
        
        /* cull node if behind closest hit point */
        float16 curDist = *sptr_near;
        if (unlikely(none(ray_tfar > curDist))) 
          continue;
        
        while (1)
        {
          /* test if this is a leaf node */
          if (unlikely(cur.isLeaf()))
            break;
          
          const bool16 valid_node = ray_tfar > curDist;
          STAT3(shadow.trav_nodes,1,popcnt(valid_node),8);
          const Node* __restrict__ const node = (Node*)cur.node();
          
          /* pop of next node */
          sptr_node--;
          sptr_near--;
          cur = *sptr_node; // FIXME: this trick creates issues with stack depth
          curDist = *sptr_near;
          
          for (unsigned i=0; i<BVH8::N; i++)
          {
            const NodeRef child = node->children[i];
            if (unlikely(child == BVH8::emptyNode)) break;
            
            const float16 lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x);
            const float16 lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y);
            const float16 lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z);
            const float16 lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x);
            const float16 lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y);
            const float16 lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z);
            const float16 lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
            const float16 lfarP  = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
            const bool16 lhit   = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar);      
            
            /* if we hit the child we choose to continue with that child if it 
               is closer than the current next child, or we push it onto the stack */
            if (likely(any(lhit)))
            {
              const float16 childDist = select(lhit,lnearP,inf);
              sptr_node++;
              sptr_near++;
              
              /* push cur node onto stack and continue with hit child */
              if (any(childDist < curDist))
              {
                *(sptr_node-1) = cur;
                *(sptr_near-1) = curDist; 
                curDist = childDist;
                cur = child;
              }
              
              /* push hit child onto stack*/
              else {
                *(sptr_node-1) = child;
                *(sptr_near-1) = childDist; 
              }
              assert(sptr_node - stack_node < BVH8::maxDepth);
            }	      
          }
        }
        
        /* return if stack is empty */
        if (unlikely(cur == BVH8::invalidNode)) 
          break;
        
        /* intersect leaf */
	assert(cur != BVH8::emptyNode);
        const bool16 valid_leaf = ray_tfar > curDist;
        STAT3(shadow.trav_leaves,1,popcnt(valid_leaf),8);
        size_t items; const Triangle* tri  = (Triangle*) cur.leaf(items);
        terminated |= PrimitiveIntersector16::occluded(!terminated,pre,ray,tri,items,bvh->scene);
        if (all(terminated)) break;
        ray_tfar = select(terminated,neg_inf,ray_tfar);
      }
      store16i(valid & terminated,&ray.geomID,0);
      AVX_ZERO_UPPER();
#endif      
    }