Exemple #1
0
static inline offset_t __npg_bk_op(offset_t start, offset_t end,
                                   offset_t upper, uint64_t attr,
                                   npg_op_t *op, uint8_t act)
{
   offset_t addr;

   if(op->nxt)
   {
      offset_t start_up = __align_next(start, op->sz);
      bool_t   diff_tbl = (pg_abs_idx(start, op->shf) != pg_abs_idx(end, op->shf));

      if(__aligned(start, op->sz) && diff_tbl)
      {
         op->fnc[act](start, attr);
         addr = start_up;
      }
      else
         addr = __npg_bk_op(start, end, start_up, attr, op->nxt, act);
   }
   else
      addr = __align(start, op->sz);

   while(addr < min(__align(end, op->sz), upper))
   {
      op->fnc[act](addr, attr);
      addr += op->sz;
   }

   return addr;
}
Exemple #2
0
//aligned alloc bytes with header size
static inline uint64_t __get_alloc_bytes(struct xheap *xheap, uint64_t bytes)
{
	if (bytes < 1<<(xheap->alignment_unit + SMALL_LIMIT))
		return __align(bytes + sizeof(struct xheap_header), 
				xheap->alignment_unit);
	else if (bytes < 1<<(xheap->alignment_unit + MEDIUM_LIMIT))
		return __align(bytes + sizeof(struct xheap_header),
				xheap->alignment_unit + MEDIUM_AL_UNIT);
	else
		return __align(bytes + sizeof(struct xheap_header), 
				xheap->alignment_unit + LARGE_AL_UNIT);
}
 void BVH4BuilderTopLevel::recurseSAH(size_t depth, BuildRecord& task, const size_t mode, const size_t threadID, const size_t numThreads)
 {
   /* return leaf node */
   assert(task.end-task.begin > 0);
   if (unlikely(task.end-task.begin == 1)) {
     *(NodeRef*)task.parentNode = refs[task.begin].node;
     return;
   }
   
   /* create leaf node */
   if (unlikely(task.depth >= BVH4::maxBuildDepth)) {
     createLeaf(task,threadID,numThreads);
     return;
   }
   
   /*! initialize task list */
   BuildRecord childTasks[4];
   childTasks[0] = task;
   size_t numChildren = 1;
   
   /*! split until node is full */
   do {
     
     /*! find best child to split */
     float bestArea = inf; 
     ssize_t bestChild = -1;
     for (size_t i=0; i<numChildren; i++) 
     {
       float A = childTasks[i].sceneArea();
       size_t items = childTasks[i].items();
       if (items > 1 && A <= bestArea) { 
         bestChild = i; 
         bestArea = A; 
       }
     }
     if (bestChild == -1) break;
     
     /*! split best child into left and right child */
     __align(64) BuildRecord left, right;
     split(childTasks[bestChild],left,right,mode,threadID,numThreads);
     
     /* add new children left and right */
     left.depth = right.depth = task.depth+1;
     childTasks[bestChild] = childTasks[numChildren-1];
     childTasks[numChildren-1] = left;
     childTasks[numChildren+0] = right;
     numChildren++;
     
   } while (numChildren < 4);
   
   /* recurse */
   BVH4::Node* node = bvh->allocNode(threadID);
   for (ssize_t i=numChildren-1; i>=0; i--) {
     childTasks[i].parentNode = (size_t)&node->child(i);
     recurse(depth+1,childTasks[i],mode,threadID,numThreads);
     node->set(i,childTasks[i].bounds.geometry);
   }
   
   *(NodeRef*)task.parentNode = bvh->encodeNode(node);
 }
Exemple #4
0
static inline void __npg_fw_op(offset_t start, offset_t end, uint64_t attr,
                               npg_op_t *op, uint8_t act)
{
   offset_t addr = start;

   while(addr < __align(end, op->sz))
   {
      op->fnc[act](addr, attr);
      addr += op->sz;
   }

   if(op->nxt && !__aligned(end, op->sz))
      __npg_fw_op(addr, end, attr, op->nxt, act);
}
  void BVH4mbBuilder::computePrimRefsTrianglesMB(const size_t threadID, const size_t numThreads) 
  {
    DBG(PING);
    const size_t numGroups = scene->size();
    const size_t startID = (threadID+0)*numPrimitives/numThreads;
    const size_t endID   = (threadID+1)*numPrimitives/numThreads;
    
    PrimRef *__restrict__ const prims     = this->prims;

    // === find first group containing startID ===
    unsigned int g=0, numSkipped = 0;
    for (; g<numGroups; g++) {       
      if (unlikely(scene->get(g) == NULL)) continue;
      if (unlikely(scene->get(g)->type != TRIANGLE_MESH)) continue;
      const TriangleMeshScene::TriangleMesh* __restrict__ const mesh = scene->getTriangleMesh(g);
      if (unlikely(!mesh->isEnabled())) continue;
      if (unlikely(mesh->numTimeSteps == 1)) continue;

      const size_t numTriangles = mesh->numTriangles;
      if (numSkipped + numTriangles > startID) break;
      numSkipped += numTriangles;
    }

    // === start with first group containing startID ===
    mic_f bounds_scene_min((float)pos_inf);
    mic_f bounds_scene_max((float)neg_inf);
    mic_f bounds_centroid_min((float)pos_inf);
    mic_f bounds_centroid_max((float)neg_inf);

    unsigned int num = 0;
    unsigned int currentID = startID;
    unsigned int offset = startID - numSkipped;

    __align(64) PrimRef local_prims[2];
    size_t numLocalPrims = 0;
    PrimRef *__restrict__ dest = &prims[currentID];

    for (; g<numGroups; g++) 
    {
      if (unlikely(scene->get(g) == NULL)) continue;
      if (unlikely(scene->get(g)->type != TRIANGLE_MESH)) continue;
      const TriangleMeshScene::TriangleMesh* __restrict__ const mesh = scene->getTriangleMesh(g);
      if (unlikely(!mesh->isEnabled())) continue;
      if (unlikely(mesh->numTimeSteps == 1)) continue;

      for (unsigned int i=offset; i<mesh->numTriangles && currentID < endID; i++, currentID++)	 
      { 			    
	//DBG_PRINT(currentID);
	const TriangleMeshScene::TriangleMesh::Triangle& tri = mesh->triangle(i);
	prefetch<PFHINT_L2>(&tri + L2_PREFETCH_ITEMS);
	prefetch<PFHINT_L1>(&tri + L1_PREFETCH_ITEMS);

	const float *__restrict__ const vptr0 = (float*)&mesh->vertex(tri.v[0]);
	const float *__restrict__ const vptr1 = (float*)&mesh->vertex(tri.v[1]);
	const float *__restrict__ const vptr2 = (float*)&mesh->vertex(tri.v[2]);

	const mic_f v0 = broadcast4to16f(vptr0);
	const mic_f v1 = broadcast4to16f(vptr1);
	const mic_f v2 = broadcast4to16f(vptr2);

	const mic_f bmin = min(min(v0,v1),v2);
	const mic_f bmax = max(max(v0,v1),v2);
	bounds_scene_min = min(bounds_scene_min,bmin);
	bounds_scene_max = max(bounds_scene_max,bmax);
	const mic_f centroid2 = bmin+bmax;
	bounds_centroid_min = min(bounds_centroid_min,centroid2);
	bounds_centroid_max = max(bounds_centroid_max,centroid2);

	store4f(&local_prims[numLocalPrims].lower,bmin);
	store4f(&local_prims[numLocalPrims].upper,bmax);	
	local_prims[numLocalPrims].lower.a = g;
	local_prims[numLocalPrims].upper.a = i;

	//DBG_PRINT( local_prims[numLocalPrims] );

	numLocalPrims++;
	if (unlikely(((size_t)dest % 64) != 0) && numLocalPrims == 1)
	  {
	    *dest = local_prims[0];
	    dest++;
	    numLocalPrims--;
	  }
	else
	  {
	    const mic_f twoAABBs = load16f(local_prims);
	    if (numLocalPrims == 2)
	      {
		numLocalPrims = 0;
		store16f_ngo(dest,twoAABBs);
		dest+=2;
	      }
	  }	
      }
      if (currentID == endID) break;
      offset = 0;
    }

    /* is there anything left in the local queue? */
    if (numLocalPrims % 2 != 0)
      *dest = local_prims[0];

    /* update global bounds */
    Centroid_Scene_AABB bounds;
    
    store4f(&bounds.centroid2.lower,bounds_centroid_min);
    store4f(&bounds.centroid2.upper,bounds_centroid_max);
    store4f(&bounds.geometry.lower,bounds_scene_min);
    store4f(&bounds.geometry.upper,bounds_scene_max);

    global_bounds.extend_atomic(bounds);    
  }
void fn08048300(code * * edx, word32 dwArg00)
{
	__align(fp + 0x00000004);
	__libc_start_main(&globals->ptr8048410, dwArg00, fp + 0x00000004, &globals->ptr804829C, &globals->ptr8048690, edx, fp - 0x00000004);
	__hlt();
}
    void BVH4iIntersector1::occluded(BVH4i* bvh, Ray& ray)
    {
      /* near and node stack */
      __align(64) NodeRef stack_node[3*BVH4i::maxDepth+1];

      /* setup */
      const mic3f rdir16      = rcp_safe(mic3f(ray.dir.x,ray.dir.y,ray.dir.z));
      const mic_f inf         = mic_f(pos_inf);
      const mic_f zero        = mic_f::zero();

      const Node      * __restrict__ nodes = (Node     *)bvh->nodePtr();
      const Triangle1 * __restrict__ accel = (Triangle1*)bvh->triPtr();

      stack_node[0] = BVH4i::invalidNode;
      stack_node[1] = bvh->root;
      size_t sindex = 2;

      const mic_f org_xyz      = loadAOS4to16f(ray.org.x,ray.org.y,ray.org.z);
      const mic_f dir_xyz      = loadAOS4to16f(ray.dir.x,ray.dir.y,ray.dir.z);
      const mic_f rdir_xyz     = loadAOS4to16f(rdir16.x[0],rdir16.y[0],rdir16.z[0]);
      const mic_f org_rdir_xyz = org_xyz * rdir_xyz;
      const mic_f min_dist_xyz = broadcast1to16f(&ray.tnear);
      const mic_f max_dist_xyz = broadcast1to16f(&ray.tfar);

      const unsigned int leaf_mask = BVH4I_LEAF_MASK;
	  
      while (1)
	{
	  NodeRef curNode = stack_node[sindex-1];
	  sindex--;
            
	  while (1) 
	    {
	      /* test if this is a leaf node */
	      if (unlikely(curNode.isLeaf(leaf_mask))) break;
        
	      const Node* __restrict__ const node = curNode.node(nodes);
	      const float* __restrict const plower = (float*)node->lower;
	      const float* __restrict const pupper = (float*)node->upper;

	      prefetch<PFHINT_L1>((char*)node + 0);
	      prefetch<PFHINT_L1>((char*)node + 64);
        
	      /* intersect single ray with 4 bounding boxes */
	      const mic_f tLowerXYZ = load16f(plower) * rdir_xyz - org_rdir_xyz;
	      const mic_f tUpperXYZ = load16f(pupper) * rdir_xyz - org_rdir_xyz;
	      const mic_f tLower = mask_min(0x7777,min_dist_xyz,tLowerXYZ,tUpperXYZ);
	      const mic_f tUpper = mask_max(0x7777,max_dist_xyz,tLowerXYZ,tUpperXYZ);

	      sindex--;
	      curNode = stack_node[sindex]; 

	      const Node* __restrict__ const next = curNode.node(nodes);
	      prefetch<PFHINT_L2>((char*)next + 0);
	      prefetch<PFHINT_L2>((char*)next + 64);

	      const mic_f tNear = vreduce_max4(tLower);
	      const mic_f tFar  = vreduce_min4(tUpper);  
	      const mic_m hitm = le(0x8888,tNear,tFar);
	      const mic_f tNear_pos = select(hitm,tNear,inf);


	      /* if no child is hit, continue with early popped child */
	      if (unlikely(none(hitm))) continue;
	      sindex++;
        
	      const unsigned long hiti = toInt(hitm);
	      const unsigned long pos_first = bitscan64(hiti);
	      const unsigned long num_hitm = countbits(hiti); 
        
	      /* if a single child is hit, continue with that child */
	      curNode = ((unsigned int *)plower)[pos_first];
	      if (likely(num_hitm == 1)) continue;
        
	      /* if two children are hit, push in correct order */
	      const unsigned long pos_second = bitscan64(pos_first,hiti);
	      if (likely(num_hitm == 2))
		{
		  const unsigned int dist_first  = ((unsigned int*)&tNear)[pos_first];
		  const unsigned int dist_second = ((unsigned int*)&tNear)[pos_second];
		  const unsigned int node_first  = curNode;
		  const unsigned int node_second = ((unsigned int*)plower)[pos_second];
          
		  if (dist_first <= dist_second)
		    {
		      stack_node[sindex] = node_second;
		      sindex++;
		      assert(sindex < 3*BVH4i::maxDepth+1);
		      continue;
		    }
		  else
		    {
		      stack_node[sindex] = curNode;
		      curNode = node_second;
		      sindex++;
		      assert(sindex < 3*BVH4i::maxDepth+1);
		      continue;
		    }
		}
        
	      /* continue with closest child and push all others */
	      const mic_f min_dist = set_min_lanes(tNear_pos);
	      const unsigned old_sindex = sindex;
	      sindex += countbits(hiti) - 1;
	      assert(sindex < 3*BVH4i::maxDepth+1);
        
	      const mic_m closest_child = eq(hitm,min_dist,tNear);
	      const unsigned long closest_child_pos = bitscan64(closest_child);
	      const mic_m m_pos = andn(hitm,andn(closest_child,(mic_m)((unsigned int)closest_child - 1)));
	      const mic_i plower_node = load16i((int*)plower);
	      curNode = ((unsigned int*)plower)[closest_child_pos];
	      compactustore16i(m_pos,&stack_node[old_sindex],plower_node);
	    }
	  
	    

	  /* return if stack is empty */
	  if (unlikely(curNode == BVH4i::invalidNode)) break;


	  /* intersect one ray against four triangles */

	  //////////////////////////////////////////////////////////////////////////////////////////////////

	  const Triangle1* tptr  = (Triangle1*) curNode.leaf(accel);
	  prefetch<PFHINT_L1>(tptr + 3);
	  prefetch<PFHINT_L1>(tptr + 2);
	  prefetch<PFHINT_L1>(tptr + 1);
	  prefetch<PFHINT_L1>(tptr + 0); 

	  const mic_i and_mask = broadcast4to16i(zlc4);
	      
	  const mic_f v0 = gather_4f_zlc(and_mask,
					 (float*)&tptr[0].v0,
					 (float*)&tptr[1].v0,
					 (float*)&tptr[2].v0,
					 (float*)&tptr[3].v0);
	      
	  const mic_f v1 = gather_4f_zlc(and_mask,
					 (float*)&tptr[0].v1,
					 (float*)&tptr[1].v1,
					 (float*)&tptr[2].v1,
					 (float*)&tptr[3].v1);
	      
	  const mic_f v2 = gather_4f_zlc(and_mask,
					 (float*)&tptr[0].v2,
					 (float*)&tptr[1].v2,
					 (float*)&tptr[2].v2,
					 (float*)&tptr[3].v2);

	  const mic_f e1 = v1 - v0;
	  const mic_f e2 = v0 - v2;	     
	  const mic_f normal = lcross_zxy(e1,e2);
	  const mic_f org = v0 - org_xyz;
	  const mic_f odzxy = msubr231(org * swizzle(dir_xyz,_MM_SWIZ_REG_DACB), dir_xyz, swizzle(org,_MM_SWIZ_REG_DACB));
	  const mic_f den = ldot3_zxy(dir_xyz,normal);	      
	  const mic_f rcp_den = rcp(den);
	  const mic_f uu = ldot3_zxy(e2,odzxy); 
	  const mic_f vv = ldot3_zxy(e1,odzxy); 
	  const mic_f u = uu * rcp_den;
	  const mic_f v = vv * rcp_den;

#if defined(__BACKFACE_CULLING__)
	  const mic_m m_init = (mic_m)0x1111 & (den > zero);
#else
	  const mic_m m_init = 0x1111;
#endif
	  const mic_m valid_u = ge(m_init,u,zero);
	  const mic_m valid_v = ge(valid_u,v,zero);
	  const mic_m m_aperture = le(valid_v,u+v,mic_f::one()); 

	  const mic_f nom = ldot3_zxy(org,normal);
	  const mic_f t = rcp_den*nom;

	  if (unlikely(none(m_aperture))) continue;

	  mic_m m_final  = lt(lt(m_aperture,min_dist_xyz,t),t,max_dist_xyz);

#if defined(__USE_RAY_MASK__)
	  const mic_i rayMask(ray.mask);
	  const mic_i triMask = swDDDD(gather16i_4i_align(&tptr[0].v2,&tptr[1].v2,&tptr[2].v2,&tptr[3].v2));
	  const mic_m m_ray_mask = (rayMask & triMask) != mic_i::zero();
	  m_final &= m_ray_mask;	      
#endif

#if defined(__INTERSECTION_FILTER__) 
              
	  /* did the ray hit one of the four triangles? */
	  while (any(m_final)) 
	    {
	      const mic_f temp_t  = select(m_final,t,max_dist_xyz);
	      const mic_f min_dist = vreduce_min(temp_t);
	      const mic_m m_dist = eq(min_dist,temp_t);
	      const size_t vecIndex = bitscan(toInt(m_dist));
	      const size_t triIndex = vecIndex >> 2;
	      const Triangle1  *__restrict__ tri_ptr = tptr + triIndex;
	      const mic_m m_tri = m_dist^(m_dist & (mic_m)((unsigned int)m_dist - 1));
	      const mic_f gnormalx = mic_f(tri_ptr->Ng.x);
	      const mic_f gnormaly = mic_f(tri_ptr->Ng.y);
	      const mic_f gnormalz = mic_f(tri_ptr->Ng.z);
	      const int geomID = tri_ptr->geomID();
	      const int primID = tri_ptr->primID();                
	      Geometry* geom = ((Scene*)bvh->geometry)->get(geomID);

	      if (likely(!geom->hasOcclusionFilter1())) break;
                
	      if (runOcclusionFilter1(geom,ray,u,v,min_dist,gnormalx,gnormaly,gnormalz,m_tri,geomID,primID)) 
		break;

	      m_final ^= m_tri; /* clear bit */
	    }
#endif

	  if (unlikely(any(m_final)))
	    {
	      ray.geomID = 0;
	      return;
	    }
	  //////////////////////////////////////////////////////////////////////////////////////////////////

	}
    }
    void BVH4mbIntersector16Single::occluded(mic_i* valid_i, BVH4mb* bvh, Ray16& ray16)
    {
      /* near and node stack */
      __align(64) NodeRef stack_node[3*BVH4i::maxDepth+1];

      /* setup */
      const mic_m m_valid     = *(mic_i*)valid_i != mic_i(0);
      const mic3f rdir16      = rcp_safe(ray16.dir);
      unsigned int terminated = toInt(!m_valid);
      const mic_f inf         = mic_f(pos_inf);
      const mic_f zero        = mic_f::zero();

      const Node               * __restrict__ nodes = (Node     *)bvh->nodePtr();
      const BVH4mb::Triangle01 * __restrict__ accel = (BVH4mb::Triangle01 *)bvh->triPtr();

      stack_node[0] = BVH4i::invalidNode;

      long rayIndex = -1;
      while((rayIndex = bitscan64(rayIndex,toInt(m_valid))) != BITSCAN_NO_BIT_SET_64)	    
        {
	  stack_node[1] = bvh->root;
	  size_t sindex = 2;

	  const mic_f org_xyz      = loadAOS4to16f(rayIndex,ray16.org.x,ray16.org.y,ray16.org.z);
	  const mic_f dir_xyz      = loadAOS4to16f(rayIndex,ray16.dir.x,ray16.dir.y,ray16.dir.z);
	  const mic_f rdir_xyz     = loadAOS4to16f(rayIndex,rdir16.x,rdir16.y,rdir16.z);
	  const mic_f org_rdir_xyz = org_xyz * rdir_xyz;
	  const mic_f min_dist_xyz = broadcast1to16f(&ray16.tnear[rayIndex]);
	  const mic_f max_dist_xyz = broadcast1to16f(&ray16.tfar[rayIndex]);
	  const mic_f time         = broadcast1to16f(&ray16.time[rayIndex]);

	  const unsigned int leaf_mask = BVH4I_LEAF_MASK;

	  while (1)
	    {
	      NodeRef curNode = stack_node[sindex-1];
	      sindex--;

	      const mic_f one_time = (mic_f::one() - time);
            
	      while (1) 
		{
		  /* test if this is a leaf node */
		  if (unlikely(curNode.isLeaf(leaf_mask))) break;
        
		  const Node* __restrict__ const node = curNode.node(nodes);
		  const float* __restrict const plower = (float*)node->lower;
		  const float* __restrict const pupper = (float*)node->upper;

		  prefetch<PFHINT_L1>((char*)node + 0*64);
		  prefetch<PFHINT_L1>((char*)node + 1*64);
		  prefetch<PFHINT_L1>((char*)node + 2*64);
		  prefetch<PFHINT_L1>((char*)node + 3*64);

		  const BVH4mb::Node* __restrict__ const nodeMB = (BVH4mb::Node*)node;
		  const mic_f lower = one_time  * load16f((float*)nodeMB->lower) + time * load16f((float*)nodeMB->lower_t1);
		  const mic_f upper = one_time  * load16f((float*)nodeMB->upper) + time * load16f((float*)nodeMB->upper_t1);
        
		  /* intersect single ray with 4 bounding boxes */
		  const mic_f tLowerXYZ = lower * rdir_xyz - org_rdir_xyz;
		  const mic_f tUpperXYZ = upper * rdir_xyz - org_rdir_xyz;

		  const mic_f tLower = mask_min(0x7777,min_dist_xyz,tLowerXYZ,tUpperXYZ);
		  const mic_f tUpper = mask_max(0x7777,max_dist_xyz,tLowerXYZ,tUpperXYZ);

		  const Node* __restrict__ const next = curNode.node(nodes);
		  prefetch<PFHINT_L2>((char*)next + 0);
		  prefetch<PFHINT_L2>((char*)next + 64);

		  sindex--;
		  const mic_f tNear = vreduce_max4(tLower);
		  const mic_f tFar  = vreduce_min4(tUpper);  
		  const mic_m hitm = le(0x8888,tNear,tFar);
		  const mic_f tNear_pos = select(hitm,tNear,inf);

		  curNode = stack_node[sindex]; // early pop of next node

		  /* if no child is hit, continue with early popped child */
		  if (unlikely(none(hitm))) continue;
		  sindex++;
        
		  const unsigned long hiti = toInt(hitm);
		  const unsigned long pos_first = bitscan64(hiti);
		  const unsigned long num_hitm = countbits(hiti); 
        
		  /* if a single child is hit, continue with that child */
		  curNode = ((unsigned int *)plower)[pos_first];
		  if (likely(num_hitm == 1)) continue;
        
		  /* if two children are hit, push in correct order */
		  const unsigned long pos_second = bitscan64(pos_first,hiti);
		  if (likely(num_hitm == 2))
		    {
		      const unsigned int dist_first  = ((unsigned int*)&tNear)[pos_first];
		      const unsigned int dist_second = ((unsigned int*)&tNear)[pos_second];
		      const unsigned int node_first  = curNode;
		      const unsigned int node_second = ((unsigned int*)plower)[pos_second];
          
		      if (dist_first <= dist_second)
			{
			  stack_node[sindex] = node_second;
			  sindex++;
			  assert(sindex < 3*BVH4i::maxDepth+1);
			  continue;
			}
		      else
			{
			  stack_node[sindex] = curNode;
			  curNode = node_second;
			  sindex++;
			  assert(sindex < 3*BVH4i::maxDepth+1);
			  continue;
			}
		    }
        
		  /* continue with closest child and push all others */
		  const mic_f min_dist = set_min_lanes(tNear_pos);
		  const unsigned int old_sindex = sindex;
		  sindex += countbits(hiti) - 1;
		  assert(sindex < 3*BVH4i::maxDepth+1);
        
		  const mic_m closest_child = eq(hitm,min_dist,tNear);
		  const unsigned long closest_child_pos = bitscan64(closest_child);
		  const mic_m m_pos = andn(hitm,andn(closest_child,(mic_m)((unsigned int)closest_child - 1)));
		  const mic_i plower_node = load16i((int*)plower);
		  curNode = ((unsigned int*)plower)[closest_child_pos];
		  compactustore16i(m_pos,&stack_node[old_sindex],plower_node);
		}
	  
	    

	      /* return if stack is empty */
	      if (unlikely(curNode == BVH4i::invalidNode)) break;


	      /* intersect one ray against four triangles */

	      //////////////////////////////////////////////////////////////////////////////////////////////////

	      const BVH4mb::Triangle01* tptr  = (BVH4mb::Triangle01*) curNode.leaf(accel);

	      prefetch<PFHINT_L1>((mic_f*)tptr +  0); 
	      prefetch<PFHINT_L1>((mic_f*)tptr +  1); 
	      prefetch<PFHINT_L1>((mic_f*)tptr +  2); 
	      prefetch<PFHINT_L1>((mic_f*)tptr +  3); 

	      const mic_i and_mask = broadcast4to16i(zlc4);
	      
	      const mic_f v0_t0 = gather_4f_zlc(and_mask,
						(float*)&tptr[0].t0.v0,
						(float*)&tptr[1].t0.v0,
						(float*)&tptr[2].t0.v0,
						(float*)&tptr[3].t0.v0);
	      
	      const mic_f v1_t0 = gather_4f_zlc(and_mask,
						(float*)&tptr[0].t0.v1,
						(float*)&tptr[1].t0.v1,
						(float*)&tptr[2].t0.v1,
						(float*)&tptr[3].t0.v1);
	      
	      const mic_f v2_t0 = gather_4f_zlc(and_mask,
						(float*)&tptr[0].t0.v2,
						(float*)&tptr[1].t0.v2,
						(float*)&tptr[2].t0.v2,
						(float*)&tptr[3].t0.v2);


	      prefetch<PFHINT_L2>((mic_f*)tptr +  4); 
	      prefetch<PFHINT_L2>((mic_f*)tptr +  5); 
	      prefetch<PFHINT_L2>((mic_f*)tptr +  6); 
	      prefetch<PFHINT_L2>((mic_f*)tptr +  7); 

	      const mic_f v0_t1 = gather_4f_zlc(and_mask,
						(float*)&tptr[0].t1.v0,
						(float*)&tptr[1].t1.v0,
						(float*)&tptr[2].t1.v0,
						(float*)&tptr[3].t1.v0);
	      
	      const mic_f v1_t1 = gather_4f_zlc(and_mask,
						(float*)&tptr[0].t1.v1,
						(float*)&tptr[1].t1.v1,
						(float*)&tptr[2].t1.v1,
						(float*)&tptr[3].t1.v1);
	      
	      const mic_f v2_t1 = gather_4f_zlc(and_mask,
						(float*)&tptr[0].t1.v2,
						(float*)&tptr[1].t1.v2,
						(float*)&tptr[2].t1.v2,
						(float*)&tptr[3].t1.v2);

	      const mic_f v0 = v0_t0 * one_time + time * v0_t1;
	      const mic_f v1 = v1_t0 * one_time + time * v1_t1;
	      const mic_f v2 = v2_t0 * one_time + time * v2_t1;

	      const mic_f e1 = v1 - v0;
	      const mic_f e2 = v0 - v2;	     
	      const mic_f normal = lcross_zxy(e1,e2);
	      const mic_f org = v0 - org_xyz;
	      const mic_f odzxy = msubr231(org * swizzle(dir_xyz,_MM_SWIZ_REG_DACB), dir_xyz, swizzle(org,_MM_SWIZ_REG_DACB));
	      const mic_f den = ldot3_zxy(dir_xyz,normal);	      
	      const mic_f rcp_den = rcp(den);
	      const mic_f uu = ldot3_zxy(e2,odzxy); 
	      const mic_f vv = ldot3_zxy(e1,odzxy); 
	      const mic_f u = uu * rcp_den;
	      const mic_f v = vv * rcp_den;

#if defined(__BACKFACE_CULLING__)
	      const mic_m m_init = (mic_m)0x1111 & (den > zero);
#else
	      const mic_m m_init = 0x1111;
#endif

	      const mic_m valid_u = ge((mic_m)m_init,u,zero);
	      const mic_m valid_v = ge(valid_u,v,zero);
	      const mic_m m_aperture = le(valid_v,u+v,mic_f::one()); 

	      const mic_f nom = ldot3_zxy(org,normal);
	      const mic_f t = rcp_den*nom;
	      if (unlikely(none(m_aperture))) continue;

	      mic_m m_final  = lt(lt(m_aperture,min_dist_xyz,t),t,max_dist_xyz);

#if defined(__USE_RAY_MASK__)
	      const mic_i rayMask(ray16.mask[rayIndex]);
	      const mic_i triMask = swDDDD(gather16i_4i_align(&tptr[0].t0.v2,&tptr[1].t0.v2,&tptr[2].t0.v2,&tptr[3].t0.v2));
	      const mic_m m_ray_mask = (rayMask & triMask) != mic_i::zero();
	      m_final &= m_ray_mask;	      
#endif

	      if (unlikely(any(m_final)))
		{
		  terminated |= mic_m::shift1[rayIndex];
		  break;
		}
	      //////////////////////////////////////////////////////////////////////////////////////////////////

	    }


	  if (unlikely(all(toMask(terminated)))) break;
	}


      store16i(m_valid & toMask(terminated),&ray16.geomID,0);
    }
void fn08048278(code * * edx, word32 dwArg00)
{
	__align(fp + 0x00000004);
	__libc_start_main(&globals->ptr804835D, dwArg00, fp + 0x00000004, &globals->ptr8048390, &globals->ptr80483C0, edx, fp - 0x00000004);
	__hlt();
}