Пример #1
0
  __m256 BVH2Intersector8Chunk<TriangleIntersector>::occluded(const BVH2Intersector8Chunk* This, Ray8& ray, const __m256 valid_i)
  {
    avxb valid = valid_i;
    avxb terminated = !valid;
    const BVH2* bvh = This->bvh;
    STAT3(shadow.travs,1,popcnt(valid),8);

    NodeRef stack[1+BVH2::maxDepth]; //!< stack of nodes that still need to get traversed
    NodeRef* stackPtr = stack;                    //!< current stack pointer
    NodeRef cur = bvh->root;                      //!< in cur we track the ID of the current node

    /* let inactive rays miss all boxes */
    const avx3f rdir = rcp_safe(ray.dir);
    avxf rayFar  = select(terminated,avxf(neg_inf),ray.tfar);

    while (true)
    {
      /*! downtraversal loop */
      while (likely(cur.isNode()))
      {
        STAT3(normal.trav_nodes,1,popcnt(valid),8);

        /* intersect packet with box of both children */
        const Node* node = cur.node();
        const size_t hit0 = intersectBox(ray.org,rdir,ray.tnear,rayFar,node,0);
        const size_t hit1 = intersectBox(ray.org,rdir,ray.tnear,rayFar,node,1);
        
        /*! if two children are hit push both onto stack */
        if (likely(hit0 != 0 && hit1 != 0)) {
          *stackPtr = node->child(0); stackPtr++; cur = node->child(1);
        }
        
        /*! if one child hit, continue with that child */
        else {
          if      (likely(hit0 != 0)) cur = node->child(0);
          else if (likely(hit1 != 0)) cur = node->child(1);
          else goto pop_node;
        }
      }

      /*! leaf node, intersect all triangles */
      {
        STAT3(shadow.trav_leaves,1,popcnt(valid),8);
        size_t num; Triangle* tri = (Triangle*) cur.leaf(NULL,num);
        for (size_t i=0; i<num; i++) {
          terminated |= TriangleIntersector::occluded(valid,ray,tri[i],bvh->vertices);
          if (all(terminated)) return terminated;
        }

        /* let terminated rays miss all boxes */
        rayFar = select(terminated,avxf(neg_inf),rayFar);
      }

      /*! pop next node from stack */
pop_node:
      if (unlikely(stackPtr == stack)) break;
      cur = *(--stackPtr);
    }
    return terminated;
  }
Пример #2
0
inline uint32_t select64(uint64_t v, size_t r)
{
	assert(r <= 64);
	if (r > popcnt(v)) return 64;
	uint32_t pos = 0;
	uint32_t c = popcnt(uint32_t(v));
	if (r > c) {
		r -= c;
		pos = 32;
		v >>= 32;
	}
Пример #3
0
int
reset_for_buflen (unsigned int thread, int new_buflen)
{
	

  // make sure new size is power of 2
  if (popcnt (new_buflen) != 1)
    return -1;
  reset_buflen = TRUE;
  uni[thread].buflen = new_buflen;
  top[thread].jack.reset_size = new_buflen;
  safefree ((char *) top[thread].hold.buf.r);
  safefree ((char *) top[thread].hold.buf.l);
  safefree ((char *) top[thread].hold.aux.r);
  safefree ((char *) top[thread].hold.aux.l);
  

  destroy_workspace (thread);
  reset_buflen = FALSE;
  loc[thread].def.size = new_buflen;
  setup_workspace (loc[thread].def.rate,
		   loc[thread].def.size,
		   loc[thread].def.mode,
		   loc[thread].path.wisdom, loc[thread].def.spec, loc[thread].def.nrx, loc[thread].def.size, thread);

  setup_local_audio (thread);

  reset_meters (thread);
  reset_spectrum (thread);
  reset_counters (thread);

  return 0;
}
Пример #4
0
/*
 * The score is always computed as it was WHITE to play.
 * The same applies to all the heuristic functions.
 *
 * Notes:
 * 	- The quiescence search guarantees that there are no captures pending for the side playing.
 * 	- Contrary to other engines, we don't give a bonus to the side playing because he already has an advantage in mobility:
 * 		the opponent may have pending captures, and these are not giving him any bonuses.
 */
int evaluate(Position * const position) {
	int score;
	unsigned int game_phase;

	uint32 white = position->white();
	uint32 black = position->black();
	uint32 kings = position->kings();
	player_t to_play = position->to_play();

	uint32 w_men = white & ~kings;
	uint32 w_kings = white & kings;
	uint32 b_men = black & ~kings;
	uint32 b_kings = black & kings;
	uint32 occupied = white | black;
	uint32 empty = ~occupied;

	int n_w_men = popcnt(w_men);
	int n_w_kings = popcnt(w_kings);
	int n_b_men = popcnt(b_men);
	int n_b_kings = popcnt(b_kings);

	int n_pieces = n_w_men + n_w_kings + n_b_men + n_b_kings;

	game_phase = (n_pieces >= MIDDLE_GAME ? (n_pieces >= OPENING ? 0 : 1) : 2);

	// material
	score = (n_w_men - n_b_men) * MAN_VALUE[game_phase] + (n_w_kings - n_b_kings) * KING_VALUE[game_phase];

	position->set_to_play(WHITE);
	score += w_mobility(w_men, w_kings, black, b_kings, empty, game_phase);
	position->set_to_play(BLACK);
	score -= b_mobility(b_men, b_kings, white, w_kings, empty, game_phase);
	position->set_to_play(to_play);

	score += back_rank(white, black, w_kings, b_kings, game_phase);
	score += triangle(w_men, b_men, w_kings, b_kings, game_phase);

	score += balance(white, black, game_phase);

	score += runaway(w_men, b_men, empty, game_phase);
	score += bridge_exploit(white, black, w_kings, b_kings, game_phase);

	score += diagonal_control(w_kings, b_kings, occupied, game_phase);

	return (to_play == WHITE ? score : -score);
}
Пример #5
0
static int eval_black_king(int sq)
{
  register int pawn_first;
  int score = 0;
  
  safety[B] = &empty[0];
  
  if(!(board->castle & (BLACK_OO|BLACK_OOO)))
  { if(sq > F8 && sq <= H8)
    { pawn_first = calc_rank64(bitscanr(board->bb_pawns[B] & file_mask[FILE_F]));
      score += (king_shield[B][pawn_first]) * 3;
      pawn_first = calc_rank64(bitscanr(board->bb_pawns[B] & file_mask[FILE_G]));
      score += (king_shield[B][pawn_first]) * 2;
      pawn_first = calc_rank64(bitscanr(board->bb_pawns[B] & file_mask[FILE_H]));
      score += (king_shield[B][pawn_first]);
    
      score -= popcnt(board->bb_pawns[W] & king_storm_mask[B]) * 4;
      
      safety[B] = &safety_kingside[B][0];
    }	
    else if(sq < D8 && sq >= A8)
    { pawn_first = calc_rank64(bitscanr(board->bb_pawns[B] & file_mask[FILE_C]));
      score += (king_shield[B][pawn_first]) * 3;
      pawn_first = calc_rank64(bitscanr(board->bb_pawns[B] & file_mask[FILE_B]));
      score += (king_shield[B][pawn_first]) * 2;
      pawn_first = calc_rank64(bitscanr(board->bb_pawns[B] & file_mask[FILE_A]));
      score += (king_shield[B][pawn_first]);
  
      score -= popcnt(board->bb_pawns[W] & queen_storm_mask[B]) * 4;

      safety[B] = &safety_queenside[B][0];
    }
    else score -= KING_UNCASTLED_STILL;
  }
  else 
  { score -= KING_UNCASTLED_YET;
    if(PieceType(sq - 16) != BP
    &&(PieceType(sq - 15) != BP
    || PieceType(sq - 17) != BP))
      score -= KING_EXPOSED;
  }
  return score;
}
Пример #6
0
static int eval_white_king(int sq)
{
  register int pawn_first;
  int score = 0;

  safety[W] = &empty[0];
  
  if(!(board->castle & (WHITE_OO|WHITE_OOO)))
  { if(sq > F1 && sq <= H1)
    { pawn_first = calc_rank64(bitscanf(board->bb_pawns[W] & file_mask[FILE_F]));
      score += (king_shield[W][pawn_first]) * 3;
      pawn_first = calc_rank64(bitscanf(board->bb_pawns[W] & file_mask[FILE_G]));
      score += (king_shield[W][pawn_first]) * 2;
      pawn_first = calc_rank64(bitscanf(board->bb_pawns[W] & file_mask[FILE_H]));
      score += (king_shield[W][pawn_first]);
    
      score -= popcnt(board->bb_pawns[B] & king_storm_mask[W]) * 4;
      
      safety[W] = &safety_kingside[W][0];
    }	
    else if(sq < D1 && sq >= A1)
    { pawn_first = calc_rank64(bitscanf(board->bb_pawns[W] & file_mask[FILE_C]));
      score += (king_shield[W][pawn_first]) * 3;
      pawn_first = calc_rank64(bitscanf(board->bb_pawns[W] & file_mask[FILE_B]));
      score += (king_shield[W][pawn_first]) * 2;
      pawn_first = calc_rank64(bitscanf(board->bb_pawns[W] & file_mask[FILE_A]));
      score += (king_shield[W][pawn_first]);
    
      score -= popcnt(board->bb_pawns[B] & queen_storm_mask[W]) * 4;
      
      safety[W] = &safety_queenside[W][0];
    }
    else score -= KING_UNCASTLED_STILL;
  }
  else
  { score -= KING_UNCASTLED_YET;
    if(PieceType(sq + 16) != WP
    &&(PieceType(sq + 15) != WP
    || PieceType(sq + 17) != WP))
      score -= KING_EXPOSED;
  }
  return score;
}
Пример #7
0
	size_t hasMoreThen(size_t items){
		size_t so_far = 0;
		for (size_t i = 0; i < words; i++){
			if(data[i]){
				so_far += popcnt(data[i]);
				if(so_far > items)
					return true;
			}
		}
		return false;
	}
Пример #8
0
			uint64_t select1(uint64_t rank) const
			{
				uint64_t pos = 0;

				uint64_t crank;		
				uint64_t i = 0;
				while ( rank >= (crank=popcnt(A[i])) )
				{
					rank -= crank;
					pos += 64;
					i++;
				}
				
				uint64_t word = A[i];
				
				if ( rank >= (crank=popcnt(word&0x00000000FFFFFFFFULL)) )
				{
					rank -= crank;
					pos += 32;
					word >>= 32;
				}
Пример #9
0
long int fmap_find(const uint8_t *image, unsigned int image_len)
{
	long int ret = -1;

	if ((image == NULL) || (image_len == 0))
		return -1;

	if (popcnt(image_len) == 1)
		ret = fmap_bsearch(image, image_len);
	else
		ret = fmap_lsearch(image, image_len);

	return ret;
}
Пример #10
0
void testmap(void)
{
	pages = calloc(1, PAGES * sizeof(struct page));
	if (!pages)
		exit(100);

	printf("simple tests\n");
#define MB ((1024*1024)/pagesize)
	setpol(0, PAGES, MPOL_INTERLEAVE, 3);
	setpol(0, MB, MPOL_BIND, 1);
	setpol(MB, MB, MPOL_BIND, 1);
	setpol(MB, MB, MPOL_DEFAULT, 0);
	setpol(MB, MB, MPOL_PREFERRED, 2);
	setpol(MB/2, MB, MPOL_DEFAULT, 0);
	setpol(MB+MB/2, MB, MPOL_BIND, 2);
	setpol(MB/2+100, 100, MPOL_PREFERRED, 1);
	setpol(100, 200, MPOL_PREFERRED, 1);
	printf("done\n");

	for (;;) {
		unsigned long offset = random() % PAGES;
		int policy = random() % (MPOL_MAX);
		unsigned long nodes = random() % 4;
		long length = random() % (PAGES - offset);

		/* validate */
		switch (policy) {
		case MPOL_DEFAULT:
			nodes = 0;
			break;
		case MPOL_INTERLEAVE:
		case MPOL_BIND:
			if (nodes == 0)
				continue;
			break;
		case MPOL_PREFERRED:
			if (popcnt(nodes) != 1)
				continue;
			break;
		}

		setpol(offset, length, policy, nodes);

	}
}
Пример #11
0
uint64 gen_multiplier(int sq, bitboard_t mask, int bits, int *mbits, int *ibits, int rook)
{
  int fail,b;
  int trials;
  int bit_trials;
  uint64 magic;
  uint64 index;
  bitboard_t x, blockers[4096], solution[4096], used[4096];
	
  for(x = 0ULL; x < (1ULL << bits); x++)
  { blockers[x] = get_blockers(x, mask);
    solution[x] = (rook) ? (get_rook_atk(sq,blockers[x])) : 
      (get_bishop_atk(sq, blockers[x]));
  }
	
  trials = 0; bit_trials = 1;
  for(;;)
  { ibits[sq] = 0;
    magic = rand64_1bits(bit_trials);
    for(x = 0ULL; x < (1ULL << bits); x++) used[x] = 0;
    fail = 0;
    for(x = 0ULL; x < (1ULL << bits); x++)
    { index = (blockers[x] * magic) >> (64 - bits);
      if(used[index] == 0) 
      { used[index] = solution[x];
        b = popcnt(blockers[x] * magic);
        if(ibits[sq] < b) ibits[sq] = b;
      }
      else if(used[index] != solution[x])
      { fail = 1;
        break;
      }
    }
    if(!fail)
    { mbits[sq] = bit_trials;
      return magic;
    }
    trials++;
    if((trials > MAX_TRIALS) && (bit_trials < bits))
    { bit_trials++;
      trials = 0;
    }
  }
}
Пример #12
0
int Cdu::count_points_bitmaps
(int nwords, unsigned *bmps, const vector<Window> & ws) {
	if(coords.size() == 1) {
		dimpair_t dp = coords[0];
		const Window &w = ws[dp.win];
		// 1-d, npoints = window.width * window.max
		npoints = w.width * w.max;
	} else {
		// multi-d, do real point counting
		// new variable for points, to satisfy OpenMP
		int local_npoints = 0;
		#pragma omp parallel for reduction(+:local_npoints)
		for(int iword = 0; iword < nwords; iword++) {
			unsigned word = ~0u;
			for(int icoord = 0; icoord < coords.size(); icoord++)
				word &= BMPS(coords[icoord].win, iword);
			local_npoints += popcnt(word);
		}  // for(iword)
		npoints = local_npoints;
	}
  return npoints;
}  // count_points
Пример #13
0
    void BVH4Intersector4Chunk<types,robust,PrimitiveIntersector4>::intersect(sseb* valid_i, BVH4* bvh, Ray4& ray)
    {
      /* load ray */
      const sseb valid0 = *valid_i;
      const sse3f rdir = rcp_safe(ray.dir);
      const sse3f org(ray.org), org_rdir = org * rdir;
      ssef ray_tnear = select(valid0,ray.tnear,ssef(pos_inf));
      ssef ray_tfar  = select(valid0,ray.tfar ,ssef(neg_inf));
      const ssef inf = ssef(pos_inf);
      Precalculations pre(valid0,ray);
      
      /* allocate stack and push root node */
      ssef    stack_near[stackSize];
      NodeRef stack_node[stackSize];
      stack_node[0] = BVH4::invalidNode;
      stack_near[0] = inf;
      stack_node[1] = bvh->root;
      stack_near[1] = ray_tnear; 
      NodeRef* stackEnd = stack_node+stackSize;
      NodeRef* __restrict__ sptr_node = stack_node + 2;
      ssef*    __restrict__ sptr_near = stack_near + 2;
      
      while (1)
      {
        /* pop next node from stack */
        assert(sptr_node > stack_node);
        sptr_node--;
        sptr_near--;
        NodeRef cur = *sptr_node;
        if (unlikely(cur == BVH4::invalidNode)) {
          assert(sptr_node == stack_node);
          break;
        }
        
        /* cull node if behind closest hit point */
        ssef curDist = *sptr_near;
        if (unlikely(none(ray_tfar > curDist))) 
          continue;
        
        while (1)
        {
	  /* process normal nodes */
          if (likely((types & 0x1) && cur.isNode()))
          {
	    const sseb valid_node = ray_tfar > curDist;
	    STAT3(normal.trav_nodes,1,popcnt(valid_node),8);
	    const Node* __restrict__ const node = cur.node();
	    
	    /* pop of next node */
	    assert(sptr_node > stack_node);
	    sptr_node--;
	    sptr_near--;
	    cur = *sptr_node; 
	    curDist = *sptr_near;
          
#pragma unroll(4)
	    for (unsigned i=0; i<BVH4::N; i++)
	    {
	      const NodeRef child = node->children[i];
	      if (unlikely(child == BVH4::emptyNode)) break;
	      ssef lnearP; const sseb lhit = node->intersect<robust>(i,org,rdir,org_rdir,ray_tnear,ray_tfar,lnearP);
	      	      
	      /* if we hit the child we choose to continue with that child if it 
		 is closer than the current next child, or we push it onto the stack */
	      if (likely(any(lhit)))
	      {
		assert(sptr_node < stackEnd);
		const ssef childDist = select(lhit,lnearP,inf);
		const NodeRef child = node->children[i];
		assert(child != BVH4::emptyNode);
		sptr_node++;
		sptr_near++;
		
		/* push cur node onto stack and continue with hit child */
		if (any(childDist < curDist))
		{
		  *(sptr_node-1) = cur;
		  *(sptr_near-1) = curDist; 
		  curDist = childDist;
		  cur = child;
		}
		
		/* push hit child onto stack */
		else {
		  *(sptr_node-1) = child;
		  *(sptr_near-1) = childDist; 
		}
	      }	      
	    }
	  }
	  /* process motion blur nodes */
          else if (likely((types & 0x10) && cur.isNodeMB()))
	  {
	    const sseb valid_node = ray_tfar > curDist;
	    STAT3(normal.trav_nodes,1,popcnt(valid_node),8);
	    const BVH4::NodeMB* __restrict__ const node = cur.nodeMB();
          
	    /* pop of next node */
	    assert(sptr_node > stack_node);
	    sptr_node--;
	    sptr_near--;
	    cur = *sptr_node; 
	    curDist = *sptr_near;
	    
#pragma unroll(4)
	    for (unsigned i=0; i<BVH4::N; i++)
	    {
	      const NodeRef child = node->child(i);
	      if (unlikely(child == BVH4::emptyNode)) break;
	      ssef lnearP; const sseb lhit = node->intersect(i,org,rdir,org_rdir,ray_tnear,ray_tfar,ray.time,lnearP);
	      
	      /* if we hit the child we choose to continue with that child if it 
		 is closer than the current next child, or we push it onto the stack */
	      if (likely(any(lhit)))
	      {
		assert(sptr_node < stackEnd);
		assert(child != BVH4::emptyNode);
		const ssef childDist = select(lhit,lnearP,inf);
		sptr_node++;
		sptr_near++;
		
		/* push cur node onto stack and continue with hit child */
		if (any(childDist < curDist))
		{
		  *(sptr_node-1) = cur;
		  *(sptr_near-1) = curDist; 
		  curDist = childDist;
		  cur = child;
		}
		
		/* push hit child onto stack */
		else {
		  *(sptr_node-1) = child;
		  *(sptr_near-1) = childDist; 
		}
	      }	      
	    }
	  }
	  else 
	    break;
        }
Пример #14
0
    void BVH4iIntersector4Chunk<TriangleIntersector4>::occluded(sseb* valid_i, BVH4i* bvh, Ray4& ray)
    {
      /* load node and primitive array */
      const Node      * __restrict__ nodes  = (Node    *)bvh->nodePtr();
      const Triangle * __restrict__ accel = (Triangle*)bvh->triPtr();
      
      /* load ray */
      const sseb valid = *valid_i;
      sseb terminated = !valid;
      const sse3f rdir = rcp_safe(ray.dir);
      const sse3f org_rdir = ray.org * rdir;
      ssef ray_tnear = select(valid,ray.tnear,pos_inf);
      ssef ray_tfar  = select(valid,ray.tfar ,neg_inf);
      const ssef inf = ssef(pos_inf);
      
      /* allocate stack and push root node */
      ssef    stack_near[3*BVH4i::maxDepth+1];
      NodeRef stack_node[3*BVH4i::maxDepth+1];
      stack_node[0] = BVH4i::invalidNode;
      stack_near[0] = inf;
      stack_node[1] = bvh->root;
      stack_near[1] = ray_tnear; 
      NodeRef* __restrict__ sptr_node = stack_node + 2;
      ssef*    __restrict__ sptr_near = stack_near + 2;
      
      while (1)
      {
        /* pop next node from stack */
        sptr_node--;
        sptr_near--;
        NodeRef curNode = *sptr_node;
        if (unlikely(curNode == BVH4i::invalidNode)) 
          break;
        
        /* cull node if behind closest hit point */
        ssef curDist = *sptr_near;
        if (unlikely(none(ray_tfar > curDist))) 
          continue;
        
        while (1)
        {
          /* test if this is a leaf node */
          if (unlikely(curNode.isLeaf()))
            break;
          
          const sseb valid_node = ray_tfar > curDist;
          STAT3(shadow.trav_nodes,1,popcnt(valid_node),4);
          const Node* __restrict__ const node = curNode.node(nodes);
          
          /* pop of next node */
          sptr_node--;
          sptr_near--;
          curNode = *sptr_node; // FIXME: this trick creates issues with stack depth
          curDist = *sptr_near;
          
#pragma unroll(4)
          for (unsigned i=0; i<4; i++)
          {
            const NodeRef child = node->children[i];
            if (unlikely(child == BVH4i::emptyNode)) break;
            
#if defined(__AVX2__)
            const ssef lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x);
            const ssef lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y);
            const ssef lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z);
            const ssef lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x);
            const ssef lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y);
            const ssef lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z);
            const ssef lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
            const ssef lfarP  = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
            const sseb lhit   = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar);      
#else
            const ssef lclipMinX = node->lower_x[i] * rdir.x - org_rdir.x;
            const ssef lclipMinY = node->lower_y[i] * rdir.y - org_rdir.y;
            const ssef lclipMinZ = node->lower_z[i] * rdir.z - org_rdir.z;
            const ssef lclipMaxX = node->upper_x[i] * rdir.x - org_rdir.x;
            const ssef lclipMaxY = node->upper_y[i] * rdir.y - org_rdir.y;
            const ssef lclipMaxZ = node->upper_z[i] * rdir.z - org_rdir.z;
            const ssef lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
            const ssef lfarP  = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
            const sseb lhit   = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar);      
#endif
            
            /* if we hit the child we choose to continue with that child if it 
               is closer than the current next child, or we push it onto the stack */
            if (likely(any(lhit)))
            {
              const ssef childDist = select(lhit,lnearP,inf);
              sptr_node++;
              sptr_near++;
              
              /* push cur node onto stack and continue with hit child */
              if (any(childDist < curDist))
              {
                *(sptr_node-1) = curNode;
                *(sptr_near-1) = curDist; 
                curDist = childDist;
                curNode = child;
              }
              
              /* push hit child onto stack*/
              else {
                *(sptr_node-1) = child;
                *(sptr_near-1) = childDist; 
              }
              assert(sptr_node - stack_node < BVH4i::maxDepth);
            }	      
          }
        }
        
        /* return if stack is empty */
        if (unlikely(curNode == BVH4i::invalidNode)) 
          break;
        
        /* intersect leaf */
        const sseb valid_leaf = ray_tfar > curDist;
        STAT3(shadow.trav_leaves,1,popcnt(valid_leaf),4);
        size_t items; const Triangle* tri  = (Triangle*) curNode.leaf(accel, items);
        terminated |= TriangleIntersector4::occluded(!terminated,ray,tri,items,bvh->geometry);
        if (all(terminated)) break;
        ray_tfar = select(terminated,neg_inf,ray_tfar);
      }
      store4i(valid & terminated,&ray.geomID,0);
      AVX_ZERO_UPPER();
    }
Пример #15
0
    void BVH8Intersector16Chunk<PrimitiveIntersector16>::occluded(bool16* valid_i, BVH8* bvh, Ray16& ray)
    {
#if defined(__AVX512__)
      
      /* load ray */
      const bool16 valid = *valid_i;
      bool16 terminated = !valid;
      const Vec3f16 rdir = rcp_safe(ray.dir);
      const Vec3f16 org_rdir = ray.org * rdir;
      float16 ray_tnear = select(valid,ray.tnear,pos_inf);
      float16 ray_tfar  = select(valid,ray.tfar ,neg_inf);
      const float16 inf = float16(pos_inf);
      Precalculations pre(valid,ray);

      /* allocate stack and push root node */
      float16    stack_near[3*BVH8::maxDepth+1];
      NodeRef stack_node[3*BVH8::maxDepth+1];
      stack_node[0] = BVH8::invalidNode;
      stack_near[0] = inf;
      stack_node[1] = bvh->root;
      stack_near[1] = ray_tnear; 
      NodeRef* __restrict__ sptr_node = stack_node + 2;
      float16*    __restrict__ sptr_near = stack_near + 2;
      
      while (1)
      {
        /* pop next node from stack */
        sptr_node--;
        sptr_near--;
        NodeRef cur = *sptr_node;
        if (unlikely(cur == BVH8::invalidNode)) 
          break;
        
        /* cull node if behind closest hit point */
        float16 curDist = *sptr_near;
        if (unlikely(none(ray_tfar > curDist))) 
          continue;
        
        while (1)
        {
          /* test if this is a leaf node */
          if (unlikely(cur.isLeaf()))
            break;
          
          const bool16 valid_node = ray_tfar > curDist;
          STAT3(shadow.trav_nodes,1,popcnt(valid_node),8);
          const Node* __restrict__ const node = (Node*)cur.node();
          
          /* pop of next node */
          sptr_node--;
          sptr_near--;
          cur = *sptr_node; // FIXME: this trick creates issues with stack depth
          curDist = *sptr_near;
          
          for (unsigned i=0; i<BVH8::N; i++)
          {
            const NodeRef child = node->children[i];
            if (unlikely(child == BVH8::emptyNode)) break;
            
            const float16 lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x);
            const float16 lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y);
            const float16 lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z);
            const float16 lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x);
            const float16 lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y);
            const float16 lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z);
            const float16 lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
            const float16 lfarP  = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
            const bool16 lhit   = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar);      
            
            /* if we hit the child we choose to continue with that child if it 
               is closer than the current next child, or we push it onto the stack */
            if (likely(any(lhit)))
            {
              const float16 childDist = select(lhit,lnearP,inf);
              sptr_node++;
              sptr_near++;
              
              /* push cur node onto stack and continue with hit child */
              if (any(childDist < curDist))
              {
                *(sptr_node-1) = cur;
                *(sptr_near-1) = curDist; 
                curDist = childDist;
                cur = child;
              }
              
              /* push hit child onto stack*/
              else {
                *(sptr_node-1) = child;
                *(sptr_near-1) = childDist; 
              }
              assert(sptr_node - stack_node < BVH8::maxDepth);
            }	      
          }
        }
        
        /* return if stack is empty */
        if (unlikely(cur == BVH8::invalidNode)) 
          break;
        
        /* intersect leaf */
	assert(cur != BVH8::emptyNode);
        const bool16 valid_leaf = ray_tfar > curDist;
        STAT3(shadow.trav_leaves,1,popcnt(valid_leaf),8);
        size_t items; const Triangle* tri  = (Triangle*) cur.leaf(items);
        terminated |= PrimitiveIntersector16::occluded(!terminated,pre,ray,tri,items,bvh->scene);
        if (all(terminated)) break;
        ray_tfar = select(terminated,neg_inf,ray_tfar);
      }
      store16i(valid & terminated,&ray.geomID,0);
      AVX_ZERO_UPPER();
#endif      
    }
Пример #16
0
  void BVH2Intersector8Chunk<TriangleIntersector>::intersect(const BVH2Intersector8Chunk* This, Ray8& ray, const __m256 valid_i)
  {
    avxb valid = valid_i;
    const BVH2* bvh = This->bvh;
    STAT3(normal.travs,1,popcnt(valid),8);

    struct StackItem {
      NodeRef ptr;
      avxf dist;
    };
    
    StackItem stack[1+BVH2::maxDepth];   //!< stack of nodes that still need to get traversed
    StackItem* stackPtr = stack;                       //!< current stack pointer
    NodeRef cur = bvh->root;                        //!< in cur we track the ID of the current node

    /* let inactive rays miss all boxes */
    const avx3f rdir = rcp_safe(ray.dir);
    ray.tfar = select(valid,ray.tfar,avxf(neg_inf));

    while (true)
    {
      /*! downtraversal loop */
      while (likely(cur.isNode()))
      {
        STAT3(normal.trav_nodes,1,popcnt(valid),8);

        /* intersect packet with box of both children */
        const Node* node = cur.node();
        avxf dist0; size_t hit0 = intersectBox(ray.org,rdir,ray.tnear,ray.tfar,node,0,dist0);
        avxf dist1; size_t hit1 = intersectBox(ray.org,rdir,ray.tnear,ray.tfar,node,1,dist1);

        /*! if two children hit, push far node onto stack and continue with closer node */
        if (likely(hit0 != 0 && hit1 != 0)) {
          if (any(valid & (dist0 < dist1))) { 
            stackPtr->ptr = node->child(1); stackPtr->dist = dist1; stackPtr++; cur = node->child(0); 
          } else { 
            stackPtr->ptr = node->child(0); stackPtr->dist = dist0; stackPtr++; cur = node->child(1); 
          }
        }

        /*! if one child hit, continue with that child */
        else {
          if      (likely(hit0 != 0)) cur = node->child(0);
          else if (likely(hit1 != 0)) cur = node->child(1);
          else goto pop_node;
        }
      }

      /*! leaf node, intersect all triangles */
      {
        STAT3(normal.trav_leaves,1,popcnt(valid),8);
        size_t num; Triangle* tri = (Triangle*) cur.leaf(NULL,num);
        for (size_t i=0; i<num; i++) {
          TriangleIntersector::intersect(valid,ray,tri[i],bvh->vertices);
        }
      }

      /*! pop next node from stack */
pop_node:
      if (unlikely(stackPtr == stack)) break;
      --stackPtr;
      cur = stackPtr->ptr;
      if (unlikely(all(stackPtr->dist > ray.tfar))) goto pop_node;
    }
  }
    void BVH4Intersector8Chunk<types, robust, PrimitiveIntersector8>::intersect(bool8* valid_i, BVH4* bvh, Ray8& ray)
    {
      /* verify correct input */
      bool8 valid0 = *valid_i;
#if defined(RTCORE_IGNORE_INVALID_RAYS)
      valid0 &= ray.valid();
#endif
      assert(all(valid0,ray.tnear > -FLT_MIN));
      assert(!(types & BVH4::FLAG_NODE_MB) || all(valid0,ray.time >= 0.0f & ray.time <= 1.0f));
      /* load ray */
      const Vec3f8 rdir = rcp_safe(ray.dir);
      const Vec3f8 org(ray.org), org_rdir = org * rdir;
      float8 ray_tnear = select(valid0,ray.tnear,pos_inf);
      float8 ray_tfar  = select(valid0,ray.tfar ,neg_inf);
      const float8 inf = float8(pos_inf);
      Precalculations pre(valid0,ray);

      /* allocate stack and push root node */
      float8    stack_near[stackSize];
      NodeRef stack_node[stackSize];
      stack_node[0] = BVH4::invalidNode;
      stack_near[0] = inf;
      stack_node[1] = bvh->root;
      stack_near[1] = ray_tnear; 
      NodeRef* stackEnd = stack_node+stackSize;
      NodeRef* __restrict__ sptr_node = stack_node + 2;
      float8*    __restrict__ sptr_near = stack_near + 2;
      
      while (1)
      {
        /* pop next node from stack */
        assert(sptr_node > stack_node);
        sptr_node--;
        sptr_near--;
        NodeRef cur = *sptr_node;
        if (unlikely(cur == BVH4::invalidNode)) {
          assert(sptr_node == stack_node);
          break;
        }
        
        /* cull node if behind closest hit point */
        float8 curDist = *sptr_near;
        if (unlikely(none(ray_tfar > curDist))) 
          continue;
        
        while (1)
        {
          /* process normal nodes */
          if (likely((types & 0x1) && cur.isNode()))
          {
	    const bool8 valid_node = ray_tfar > curDist;
	    STAT3(normal.trav_nodes,1,popcnt(valid_node),8);
	    const Node* __restrict__ const node = cur.node();
	    
	    /* pop of next node */
	    assert(sptr_node > stack_node);
	    sptr_node--;
	    sptr_near--;
	    cur = *sptr_node; 
	    curDist = *sptr_near;
	    
#pragma unroll(4)
	    for (unsigned i=0; i<BVH4::N; i++)
	    {
	      const NodeRef child = node->children[i];
	      if (unlikely(child == BVH4::emptyNode)) break;
	      float8 lnearP; const bool8 lhit = intersect8_node<robust>(node,i,org,rdir,org_rdir,ray_tnear,ray_tfar,lnearP);
	      
	      /* if we hit the child we choose to continue with that child if it 
		 is closer than the current next child, or we push it onto the stack */
	      if (likely(any(lhit)))
	      {
		assert(sptr_node < stackEnd);
		assert(child != BVH4::emptyNode);
		const float8 childDist = select(lhit,lnearP,inf);
		sptr_node++;
		sptr_near++;
		
		/* push cur node onto stack and continue with hit child */
		if (any(childDist < curDist))
		{
		  *(sptr_node-1) = cur;
		  *(sptr_near-1) = curDist; 
		  curDist = childDist;
		  cur = child;
		}
		
		/* push hit child onto stack */
		else {
		  *(sptr_node-1) = child;
		  *(sptr_near-1) = childDist; 
		}
	      }     
	    }
	  }
	  
	  /* process motion blur nodes */
          else if (likely((types & 0x10) && cur.isNodeMB()))
	  {
	    const bool8 valid_node = ray_tfar > curDist;
	    STAT3(normal.trav_nodes,1,popcnt(valid_node),8);
	    const BVH4::NodeMB* __restrict__ const node = cur.nodeMB();
          
	    /* pop of next node */
	    assert(sptr_node > stack_node);
	    sptr_node--;
	    sptr_near--;
	    cur = *sptr_node; 
	    curDist = *sptr_near;
	    
#pragma unroll(4)
	    for (unsigned i=0; i<BVH4::N; i++)
	    {
	      const NodeRef child = node->child(i);
	      if (unlikely(child == BVH4::emptyNode)) break;
	      float8 lnearP; const bool8 lhit = intersect_node(node,i,org,rdir,org_rdir,ray_tnear,ray_tfar,ray.time,lnearP);
	      	      
	      /* if we hit the child we choose to continue with that child if it 
		 is closer than the current next child, or we push it onto the stack */
	      if (likely(any(lhit)))
	      {
		assert(sptr_node < stackEnd);
		assert(child != BVH4::emptyNode);
		const float8 childDist = select(lhit,lnearP,inf);
		sptr_node++;
		sptr_near++;
		
		/* push cur node onto stack and continue with hit child */
		if (any(childDist < curDist))
		{
		  *(sptr_node-1) = cur;
		  *(sptr_near-1) = curDist; 
		  curDist = childDist;
		  cur = child;
		}
		
		/* push hit child onto stack */
		else {
		  *(sptr_node-1) = child;
		  *(sptr_near-1) = childDist; 
		}
	      }	      
	    }
	  }
	  else 
	    break;
	}
Пример #18
0
    void BVH8iIntersector8Hybrid<TriangleIntersector8>::occluded(avxb* valid_i, BVH8i* bvh, Ray8& ray)
    {
      /* load ray */
      const avxb valid = *valid_i;
      avxb terminated = !valid;
      avx3f ray_org = ray.org, ray_dir = ray.dir;
      avxf ray_tnear = ray.tnear, ray_tfar  = ray.tfar;
#if defined(__FIX_RAYS__)
      const avxf float_range = 0.1f*FLT_MAX;
      ray_org = clamp(ray_org,avx3f(-float_range),avx3f(+float_range));
      ray_dir = clamp(ray_dir,avx3f(-float_range),avx3f(+float_range));
      ray_tnear = max(ray_tnear,FLT_MIN); 
      ray_tfar  = min(ray_tfar,float(inf)); 
#endif
      const avx3f rdir = rcp_safe(ray_dir);
      const avx3f org(ray_org), org_rdir = org * rdir;
      ray_tnear = select(valid,ray_tnear,avxf(pos_inf));
      ray_tfar  = select(valid,ray_tfar ,avxf(neg_inf));
      const avxf inf = avxf(pos_inf);
      
      /* compute near/far per ray */
      avx3i nearXYZ;
      nearXYZ.x = select(rdir.x >= 0.0f,avxi(0*(int)sizeof(avxf)),avxi(1*(int)sizeof(avxf)));
      nearXYZ.y = select(rdir.y >= 0.0f,avxi(2*(int)sizeof(avxf)),avxi(3*(int)sizeof(avxf)));
      nearXYZ.z = select(rdir.z >= 0.0f,avxi(4*(int)sizeof(avxf)),avxi(5*(int)sizeof(avxf)));

      /* allocate stack and push root node */
      avxf    stack_near[stackSizeChunk];
      NodeRef stack_node[stackSizeChunk];
      stack_node[0] = BVH4i::invalidNode;
      stack_near[0] = inf;
      stack_node[1] = bvh->root;
      stack_near[1] = ray_tnear; 
      NodeRef* stackEnd = stack_node+stackSizeChunk;
      NodeRef* __restrict__ sptr_node = stack_node + 2;
      avxf*    __restrict__ sptr_near = stack_near + 2;

      const Node     * __restrict__ nodes = (Node    *)bvh->nodePtr();
      const Triangle * __restrict__ accel = (Triangle*)bvh->triPtr();
      
      while (1)
      {
        /* pop next node from stack */
        assert(sptr_node > stack_node);
        sptr_node--;
        sptr_near--;
        NodeRef curNode = *sptr_node;
        if (unlikely(curNode == BVH4i::invalidNode)) {
          assert(sptr_node == stack_node);
          break;
        }

        /* cull node if behind closest hit point */
        avxf curDist = *sptr_near;
        const avxb active = curDist < ray_tfar;
        if (unlikely(none(active))) 
          continue;
        
        /* switch to single ray traversal */
#if !defined(__WIN32__) || defined(__X86_64__)
        size_t bits = movemask(active);
        if (unlikely(__popcnt(bits) <= SWITCH_THRESHOLD)) {
          for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) {
            if (occluded1(bvh,curNode,i,ray,ray_org,ray_dir,rdir,ray_tnear,ray_tfar,nearXYZ))
              terminated[i] = -1;
          }
          if (all(terminated)) break;
          ray_tfar = select(terminated,avxf(neg_inf),ray_tfar);
          continue;
        }
#endif
                
        while (1)
        {
          /* test if this is a leaf node */
          if (unlikely(curNode.isLeaf()))
            break;
          
          const avxb valid_node = ray_tfar > curDist;
          STAT3(shadow.trav_nodes,1,popcnt(valid_node),8);
          const Node* __restrict__ const node = (Node*)curNode.node(nodes);
          
          /* pop of next node */
          assert(sptr_node > stack_node);
          sptr_node--;
          sptr_near--;
          curNode = *sptr_node;
          curDist = *sptr_near;
          
          for (unsigned i=0; i<8; i++)
          {
            const NodeRef child = node->children[i];
            if (unlikely(child == BVH4i::emptyNode)) break;
            
#if defined(__AVX2__)
            const avxf lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x);
            const avxf lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y);
            const avxf lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z);
            const avxf lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x);
            const avxf lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y);
            const avxf lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z);
            const avxf lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
            const avxf lfarP  = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
            const avxb lhit   = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar);      
#else
            const avxf lclipMinX = (node->lower_x[i] - org.x) * rdir.x;
            const avxf lclipMinY = (node->lower_y[i] - org.y) * rdir.y;
            const avxf lclipMinZ = (node->lower_z[i] - org.z) * rdir.z;
            const avxf lclipMaxX = (node->upper_x[i] - org.x) * rdir.x;
            const avxf lclipMaxY = (node->upper_y[i] - org.y) * rdir.y;
            const avxf lclipMaxZ = (node->upper_z[i] - org.z) * rdir.z;
            const avxf lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
            const avxf lfarP  = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
            const avxb lhit   = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar);      
#endif
            
            /* if we hit the child we choose to continue with that child if it 
               is closer than the current next child, or we push it onto the stack */
            if (likely(any(lhit)))
            {
              assert(sptr_node < stackEnd);
              assert(child != BVH4i::emptyNode);
              const avxf childDist = select(lhit,lnearP,inf);
              sptr_node++;
              sptr_near++;
              
              /* push cur node onto stack and continue with hit child */
              if (any(childDist < curDist))
              {
                *(sptr_node-1) = curNode;
                *(sptr_near-1) = curDist; 
                curDist = childDist;
                curNode = child;
              }
              
              /* push hit child onto stack */
              else {
                *(sptr_node-1) = child;
                *(sptr_near-1) = childDist; 
              }
            }	      
          }
        }
        
        /* return if stack is empty */
        if (unlikely(curNode == BVH4i::invalidNode)) {
          assert(sptr_node == stack_node);
          break;
        }
        
        /* intersect leaf */
        const avxb valid_leaf = ray_tfar > curDist;
        STAT3(shadow.trav_leaves,1,popcnt(valid_leaf),8);
        size_t items; const Triangle* prim = (Triangle*) curNode.leaf(accel,items);
        terminated |= TriangleIntersector8::occluded(!terminated,ray,prim,items,bvh->geometry);
        if (all(terminated)) break;
        ray_tfar = select(terminated,avxf(neg_inf),ray_tfar);
      }
      store8i(valid & terminated,&ray.geomID,0);
      AVX_ZERO_UPPER();
    }
    void BVH4Intersector4Hybrid<PrimitiveIntersector4>::intersect(sseb* valid_i, BVH4* bvh, Ray4& ray)
    {
      /* load ray */
      const sseb valid0 = *valid_i;
      sse3f ray_org = ray.org, ray_dir = ray.dir;
      ssef ray_tnear = ray.tnear, ray_tfar  = ray.tfar;
#if defined(__FIX_RAYS__)
      const ssef float_range = 0.1f*FLT_MAX;
      ray_org = clamp(ray_org,sse3f(-float_range),sse3f(+float_range));
      ray_dir = clamp(ray_dir,sse3f(-float_range),sse3f(+float_range));
      ray_tnear = max(ray_tnear,FLT_MIN); 
      ray_tfar  = min(ray_tfar,float(inf)); 
#endif
      const sse3f rdir = rcp_safe(ray_dir);
      const sse3f org(ray_org), org_rdir = org * rdir;
      ray_tnear = select(valid0,ray_tnear,ssef(pos_inf));
      ray_tfar  = select(valid0,ray_tfar ,ssef(neg_inf));
      const ssef inf = ssef(pos_inf);
      
      /* allocate stack and push root node */
      ssef    stack_near[stackSizeChunk]; 
      NodeRef stack_node[stackSizeChunk];
      stack_node[0] = BVH4::invalidNode;
      stack_near[0] = inf;
      stack_node[1] = bvh->root;
      stack_near[1] = ray_tnear; 
      NodeRef* stackEnd = stack_node+stackSizeChunk;
      NodeRef* __restrict__ sptr_node = stack_node + 2;
      ssef*    __restrict__ sptr_near = stack_near + 2;
      
      while (1)
      {
        /* pop next node from stack */
        assert(sptr_node > stack_node);
        sptr_node--;
        sptr_near--;
        NodeRef curNode = *sptr_node;
        if (unlikely(curNode == BVH4::invalidNode)) {
          assert(sptr_node == stack_node);
          break;
        }
        
        /* cull node if behind closest hit point */
        ssef curDist = *sptr_near;
        const sseb active = curDist < ray_tfar;
        if (unlikely(none(active))) 
          continue;
        
        /* switch to single ray traversal */
#if !defined(__WIN32__) || defined(__X86_64__)
        size_t bits = movemask(active);
        if (unlikely(__popcnt(bits) <= SWITCH_THRESHOLD)) {
          for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) {
            intersect1(bvh,curNode,i,ray,ray_org,ray_dir,rdir,ray_tnear,ray_tfar);
          }
          ray_tfar = ray.tfar;
          continue;
        }
#endif
        
        while (1)
        {
          /* test if this is a leaf node */
          if (unlikely(curNode.isLeaf()))
            break;
          
          const sseb valid_node = ray_tfar > curDist;
          STAT3(normal.trav_nodes,1,popcnt(valid_node),4);
          const Node* __restrict__ const node = curNode.node();
          
          /* pop of next node */
          assert(sptr_node > stack_node);
          sptr_node--;
          sptr_near--;
          curNode = *sptr_node; 
          curDist = *sptr_near;
          
#pragma unroll(4)
          for (unsigned i=0; i<4; i++)
          {
            const NodeRef child = node->children[i];
            if (unlikely(child == BVH4::emptyNode)) break;
            
#if defined(__AVX2__)
            const ssef lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x);
            const ssef lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y);
            const ssef lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z);
            const ssef lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x);
            const ssef lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y);
            const ssef lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z);
#else
            const ssef lclipMinX = (node->lower_x[i] - org.x) * rdir.x;
            const ssef lclipMinY = (node->lower_y[i] - org.y) * rdir.y;
            const ssef lclipMinZ = (node->lower_z[i] - org.z) * rdir.z;
            const ssef lclipMaxX = (node->upper_x[i] - org.x) * rdir.x;
            const ssef lclipMaxY = (node->upper_y[i] - org.y) * rdir.y;
            const ssef lclipMaxZ = (node->upper_z[i] - org.z) * rdir.z;
#endif
    
#if defined(__SSE4_1__)
            const ssef lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
            const ssef lfarP  = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
            const sseb lhit   = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar);      
#else
            const ssef lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
            const ssef lfarP  = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
            const sseb lhit   = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar);      
#endif
        
            /* if we hit the child we choose to continue with that child if it 
               is closer than the current next child, or we push it onto the stack */
            if (likely(any(lhit)))
            {
              assert(sptr_node < stackEnd);
              const ssef childDist = select(lhit,lnearP,inf);
              const NodeRef child = node->children[i];
              assert(child != BVH4::emptyNode);
              sptr_node++;
              sptr_near++;

              /* push cur node onto stack and continue with hit child */
              if (any(childDist < curDist))
              {
                *(sptr_node-1) = curNode;
                *(sptr_near-1) = curDist; 
                curDist = childDist;
                curNode = child;
              }
              
              /* push hit child onto stack */
              else {
                *(sptr_node-1) = child;
                *(sptr_near-1) = childDist; 
              }
            }	      
          }
        }
        
        /* return if stack is empty */
        if (unlikely(curNode == BVH4::invalidNode)) {
          assert(sptr_node == stack_node);
          break;
        }
        
        /* intersect leaf */
        const sseb valid_leaf = ray_tfar > curDist;
        STAT3(normal.trav_leaves,1,popcnt(valid_leaf),4);
        size_t items; const Primitive* prim = (Primitive*) curNode.leaf(items);
        PrimitiveIntersector4::intersect(valid_leaf,ray,prim,items,bvh->geometry);
        ray_tfar = select(valid_leaf,ray.tfar,ray_tfar);
      }
      AVX_ZERO_UPPER();
    }
Пример #20
0
    void BVH4Intersector4Chunk<PrimitiveIntersector4>::intersect(sseb* valid_i, BVH4* bvh, Ray4& ray)
    {
      /* load ray */
      const sseb valid0 = *valid_i;
      const sse3f rdir = rcp_safe(ray.dir);
      const sse3f org(ray.org), org_rdir = org * rdir;
      ssef ray_tnear = select(valid0,ray.tnear,ssef(pos_inf));
      ssef ray_tfar  = select(valid0,ray.tfar ,ssef(neg_inf));
      const ssef inf = ssef(pos_inf);
      Precalculations pre(valid0,ray);
      
      /* allocate stack and push root node */
      ssef    stack_near[stackSize];
      NodeRef stack_node[stackSize];
      stack_node[0] = BVH4::invalidNode;
      stack_near[0] = inf;
      stack_node[1] = bvh->root;
      stack_near[1] = ray_tnear; 
      NodeRef* stackEnd = stack_node+stackSize;
      NodeRef* __restrict__ sptr_node = stack_node + 2;
      ssef*    __restrict__ sptr_near = stack_near + 2;
      
      while (1)
      {
        /* pop next node from stack */
        assert(sptr_node > stack_node);
        sptr_node--;
        sptr_near--;
        NodeRef curNode = *sptr_node;
        if (unlikely(curNode == BVH4::invalidNode)) {
          assert(sptr_node == stack_node);
          break;
        }
        
        /* cull node if behind closest hit point */
        ssef curDist = *sptr_near;
        if (unlikely(none(ray_tfar > curDist))) 
          continue;
        
        while (1)
        {
          /* test if this is a leaf node */
          if (unlikely(curNode.isLeaf()))
            break;
          
          const sseb valid_node = ray_tfar > curDist;
          STAT3(normal.trav_nodes,1,popcnt(valid_node),4);
          const Node* __restrict__ const node = curNode.node();
          
          /* pop of next node */
          assert(sptr_node > stack_node);
          sptr_node--;
          sptr_near--;
          curNode = *sptr_node;
          curDist = *sptr_near;
          
#pragma unroll(4)
          for (unsigned i=0; i<BVH4::N; i++)
          {
            const NodeRef child = node->children[i];
            if (unlikely(child == BVH4::emptyNode)) break;
            
#if defined(__AVX2__)
            const ssef lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x);
            const ssef lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y);
            const ssef lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z);
            const ssef lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x);
            const ssef lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y);
            const ssef lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z);
#else
            const ssef lclipMinX = (node->lower_x[i] - org.x) * rdir.x;
            const ssef lclipMinY = (node->lower_y[i] - org.y) * rdir.y;
            const ssef lclipMinZ = (node->lower_z[i] - org.z) * rdir.z;
            const ssef lclipMaxX = (node->upper_x[i] - org.x) * rdir.x;
            const ssef lclipMaxY = (node->upper_y[i] - org.y) * rdir.y;
            const ssef lclipMaxZ = (node->upper_z[i] - org.z) * rdir.z;
#endif

#if defined(__SSE4_1__)
            const ssef lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
            const ssef lfarP  = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
            const sseb lhit   = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar);      
#else
            const ssef lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
            const ssef lfarP  = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
            const sseb lhit   = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar);      
#endif
            
            /* if we hit the child we choose to continue with that child if it 
               is closer than the current next child, or we push it onto the stack */
            if (likely(any(lhit)))
            {
              assert(sptr_node < stackEnd);
              const ssef childDist = select(lhit,lnearP,inf);
              const NodeRef child = node->children[i];
              assert(child != BVH4::emptyNode);
              sptr_node++;
              sptr_near++;
              
              /* push cur node onto stack and continue with hit child */
              if (any(childDist < curDist))
              {
                *(sptr_node-1) = curNode;
                *(sptr_near-1) = curDist; 
                curDist = childDist;
                curNode = child;
              }
              
              /* push hit child onto stack */
              else {
                *(sptr_node-1) = child;
                *(sptr_near-1) = childDist; 
              }
            }	      
          }
        }
        
        /* return if stack is empty */
        if (unlikely(curNode == BVH4::invalidNode)) {
          assert(sptr_node == stack_node);
          break;
        }
        
        /* intersect leaf */
        const sseb valid_leaf = ray_tfar > curDist;
        STAT3(normal.trav_leaves,1,popcnt(valid_leaf),4);
        size_t items; const Primitive* prim = (Primitive*) curNode.leaf(items);
        PrimitiveIntersector4::intersect(valid_leaf,pre,ray,prim,items,bvh->geometry);
        ray_tfar = select(valid_leaf,ray.tfar,ray_tfar);
      }
      AVX_ZERO_UPPER();
    }
    void BVH4Intersector4Hybrid<types,robust,PrimitiveIntersector4>::intersect(bool4* valid_i, BVH4* bvh, Ray4& ray)
    {
      /* verify correct input */
      bool4 valid0 = *valid_i;
#if defined(RTCORE_IGNORE_INVALID_RAYS)
      valid0 &= ray.valid();
#endif
      assert(all(valid0,ray.tnear > -FLT_MIN));
      assert(!(types & BVH4::FLAG_NODE_MB) || all(valid0,ray.time >= 0.0f & ray.time <= 1.0f));

      /* load ray */
      Vec3f4 ray_org = ray.org;
      Vec3f4 ray_dir = ray.dir;
      float4 ray_tnear = ray.tnear, ray_tfar  = ray.tfar;
      const Vec3f4 rdir = rcp_safe(ray_dir);
      const Vec3f4 org(ray_org), org_rdir = org * rdir;
      ray_tnear = select(valid0,ray_tnear,float4(pos_inf));
      ray_tfar  = select(valid0,ray_tfar ,float4(neg_inf));
      const float4 inf = float4(pos_inf);
      Precalculations pre(valid0,ray);

      /* compute near/far per ray */
      Vec3i4 nearXYZ;
      nearXYZ.x = select(rdir.x >= 0.0f,int4(0*(int)sizeof(float4)),int4(1*(int)sizeof(float4)));
      nearXYZ.y = select(rdir.y >= 0.0f,int4(2*(int)sizeof(float4)),int4(3*(int)sizeof(float4)));
      nearXYZ.z = select(rdir.z >= 0.0f,int4(4*(int)sizeof(float4)),int4(5*(int)sizeof(float4)));

      /* allocate stack and push root node */
      float4    stack_near[stackSizeChunk];
      NodeRef stack_node[stackSizeChunk];
      stack_node[0] = BVH4::invalidNode;
      stack_near[0] = inf;
      stack_node[1] = bvh->root;
      stack_near[1] = ray_tnear; 
      NodeRef* stackEnd = stack_node+stackSizeChunk;
      NodeRef* __restrict__ sptr_node = stack_node + 2;
      float4*    __restrict__ sptr_near = stack_near + 2;
      
      while (1) pop:
      {
        /* pop next node from stack */
        assert(sptr_node > stack_node);
        sptr_node--;
        sptr_near--;
        NodeRef cur = *sptr_node;
        if (unlikely(cur == BVH4::invalidNode)) {
          assert(sptr_node == stack_node);
          break;
        }
        
        /* cull node if behind closest hit point */
        float4 curDist = *sptr_near;
        const bool4 active = curDist < ray_tfar;
        if (unlikely(none(active)))
          continue;
        
        /* switch to single ray traversal */
#if !defined(__WIN32__) || defined(__X86_64__)
        size_t bits = movemask(active);
        if (unlikely(__popcnt(bits) <= SWITCH_THRESHOLD)) {
          for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) {
            BVH4Intersector4Single<types,robust,PrimitiveIntersector4>::intersect1(bvh, cur, i, pre, ray, ray_org, ray_dir, rdir, ray_tnear, ray_tfar, nearXYZ);
          }
          ray_tfar = min(ray_tfar,ray.tfar);
          continue;
        }
#endif

        while (1)
        {
	  /* process normal nodes */
          if (likely((types & 0x1) && cur.isNode()))
          {
	    const bool4 valid_node = ray_tfar > curDist;
	    STAT3(normal.trav_nodes,1,popcnt(valid_node),4);
	    const Node* __restrict__ const node = cur.node();
	    
	    /* pop of next node */
	    assert(sptr_node > stack_node);
	    sptr_node--;
	    sptr_near--;
	    cur = *sptr_node; 
	    curDist = *sptr_near;
	    
#pragma unroll(4)
	    for (unsigned i=0; i<BVH4::N; i++)
	    {
	      const NodeRef child = node->children[i];
	      if (unlikely(child == BVH4::emptyNode)) break;
	      float4 lnearP; const bool4 lhit = intersect_node<robust>(node,i,org,rdir,org_rdir,ray_tnear,ray_tfar,lnearP);
	      
	      /* if we hit the child we choose to continue with that child if it 
		 is closer than the current next child, or we push it onto the stack */
	      if (likely(any(lhit)))
	      {
		assert(sptr_node < stackEnd);
		assert(child != BVH4::emptyNode);
		const float4 childDist = select(lhit,lnearP,inf);
		sptr_node++;
		sptr_near++;
		
		/* push cur node onto stack and continue with hit child */
		if (any(childDist < curDist))
		{
		  *(sptr_node-1) = cur;
		  *(sptr_near-1) = curDist; 
		  curDist = childDist;
		  cur = child;
		}
		
		/* push hit child onto stack */
		else {
		  *(sptr_node-1) = child;
		  *(sptr_near-1) = childDist; 
		}
	      }     
	    }
#if SWITCH_DURING_DOWN_TRAVERSAL == 1
          // seems to be the best place for testing utilization
          if (unlikely(popcnt(ray_tfar > curDist) <= SWITCH_THRESHOLD))
            {
              *sptr_node++ = cur;
              *sptr_near++ = curDist;
              goto pop;
            }
#endif
	  }
	  
	  /* process motion blur nodes */
          else if (likely((types & 0x10) && cur.isNodeMB()))
	  {
	    const bool4 valid_node = ray_tfar > curDist;
	    STAT3(normal.trav_nodes,1,popcnt(valid_node),4);
	    const BVH4::NodeMB* __restrict__ const node = cur.nodeMB();
          
	    /* pop of next node */
	    assert(sptr_node > stack_node);
	    sptr_node--;
	    sptr_near--;
	    cur = *sptr_node; 
	    curDist = *sptr_near;
	    
#pragma unroll(4)
	    for (unsigned i=0; i<BVH4::N; i++)
	    {
	      const NodeRef child = node->child(i);
	      if (unlikely(child == BVH4::emptyNode)) break;
	      float4 lnearP; const bool4 lhit = intersect_node(node,i,org,rdir,org_rdir,ray_tnear,ray_tfar,ray.time,lnearP);
	      
	      /* if we hit the child we choose to continue with that child if it 
		 is closer than the current next child, or we push it onto the stack */
	      if (likely(any(lhit)))
	      {
		assert(sptr_node < stackEnd);
		assert(child != BVH4::emptyNode);
		const float4 childDist = select(lhit,lnearP,inf);
		sptr_node++;
		sptr_near++;
		
		/* push cur node onto stack and continue with hit child */
		if (any(childDist < curDist))
		{
		  *(sptr_node-1) = cur;
		  *(sptr_near-1) = curDist; 
		  curDist = childDist;
		  cur = child;
		}
		
		/* push hit child onto stack */
		else {
		  *(sptr_node-1) = child;
		  *(sptr_near-1) = childDist; 
		}
	      }	      
	    }
#if SWITCH_DURING_DOWN_TRAVERSAL == 1
          // seems to be the best place for testing utilization
          if (unlikely(popcnt(ray_tfar > curDist) <= SWITCH_THRESHOLD))
            {
              *sptr_node++ = cur;
              *sptr_near++ = curDist;
              goto pop;
            }
#endif
	  }
	  else 
	    break;
	}
Пример #22
0
pawn_entry_t *eval_pawn_struct()
{
  int sq, atk_sq, file, rank;
  bitboard_t t, sq_mask, front, wpawns, bpawns;
  bool passed, backward, doubled, isolated, connected;
  pawn_entry_t *p = pnt + (pos->phash & PMASK);
  
  if(p->pkey == pos->phash) return (p);
  
  p->scoremg = 0;
  p->scoreeg = 0;
  p->passers = 0ULL;
  p->pkey = pos->phash;
  
  wpawns = pos->occ[WP];
  bpawns = pos->occ[BP];
  
  //storing pawn attacks:
  p->attacks[W]  = ((pos->occ[WP] & ~FMASK_A) << 7);
  p->attacks[W] |= ((pos->occ[WP] & ~FMASK_H) << 9);
  p->attacks[B]  = ((pos->occ[BP] & ~FMASK_A) >> 9);
  p->attacks[B] |= ((pos->occ[BP] & ~FMASK_H) >> 7);

  t = pos->occ[WP];
  while(t)
  {
    sq = bitscanf(t);
    bitclear(t, sq);
    file = File(sq);
    rank = Rank(sq);
  
    //psq_values:
    p->scoremg += psq_pawn_mg[W][sq];
    
    //collecting the pawn type info:
    doubled   = (true && (wpawns & rook_backward[B][sq]));
    isolated  = (true && (!(wpawns & isolated_mask[file])));
    connected = (true && (wpawns & connected_mask[W][sq]));
    passed    = (!doubled && (!(bpawns & passer_mask[W][sq])));
    backward  = (!isolated && !connected && !passed && (!(wpawns & backward_mask[W][sq])));
  
    //if pawn can advance (not blocked by any other pawn), 
    //but cannot advance safely, it's backward:
    if(backward)
    { front = rook_backward[B][sq];
      backward = false;
      while(front)
      { atk_sq = bitscanf(front);
        sq_mask = (1ULL << atk_sq);
        if((sq_mask & (wpawns | bpawns)) 
        || (sq_mask & p->attacks[W]))
          break;
        if(sq_mask & p->attacks[B])
        { backward = true;
          break;
        }
        bitclear(front, atk_sq);
      }
    }
    //penalties and bonuses:
    if(backward)
    { p->scoremg -= backward_penalty_mg[file];
      p->scoreeg -= backward_penalty_eg[file];
      if(!(bpawns & rook_backward[B][sq]))
        p->scoremg -= backward_on_semi_op_mg;
       
      if(((1ULL << (sq+8)) & p->attacks[B])
      && ((1ULL << (sq+8)) & boutposts))
        p->scoremg -= weak_square_mg;
    }
    if(doubled)
    { p->scoremg -= doubled_penalty_mg[file];
      p->scoreeg -= doubled_penalty_eg[file];
    }
    if(isolated)
    { p->scoremg -= isolated_penalty_mg[file];
      p->scoreeg -= isolated_penalty_eg[file];
      if(!(bpawns & rook_backward[B][sq]))
        p->scoremg -= isolated_on_semi_op_mg;
    }
    if(connected)
    { p->scoremg += pawn_chain_element_mg[file];
      p->scoreeg += pawn_chain_element_eg[file];
    }
    
    if(passed)
    { p->passers |= (1ULL << sq);
      p->scoremg += passer_bonus_mg[W][rank];
     if(connected) p->scoremg += connected_passer_mg[W][rank];
    }
    else
    { if(!(bpawns & rook_backward[B][sq]))
      { if((popcnt(wpawns & backward_mask[W][sq])) >= \
           (popcnt(bpawns & backward_mask[B][sq+8]))){
              p->scoremg += candidate_mg[W][rank];
              p->scoreeg += candidate_eg[W][rank];
        }
      }
    }
  }
  
  t = pos->occ[BP];
  while(t)
  {
    sq = bitscanf(t);
    bitclear(t, sq);
    file = File(sq);
    rank = Rank(sq);

    //psq_values:
    p->scoremg -= psq_pawn_mg[B][sq];
    
    //collecting the pawn type info:
    doubled   = (true && (bpawns & rook_backward[W][sq]));
    isolated  = (true && (!(bpawns & isolated_mask[file])));
    connected = (true && (bpawns & connected_mask[B][sq]));
    passed    = (!doubled && (!(wpawns & passer_mask[B][sq])));
    backward  = (!isolated && !connected && !passed && (!(bpawns & backward_mask[B][sq])));
    
    if(backward)
    { front = rook_backward[W][sq];
      backward = false;
      while(front)
      { atk_sq = bitscanr(front);
        sq_mask = (1ULL << atk_sq);
        if((sq_mask & (wpawns | bpawns)) 
        || (sq_mask & p->attacks[B]))
          break;
        if(sq_mask & p->attacks[W])
        { backward = true;
          break;
        }
        bitclear(front, atk_sq);
      }
    }
    //penalties and bonuses:        
    if(backward)
    { p->scoremg += backward_penalty_mg[file];
      p->scoreeg += backward_penalty_eg[file];
      if(!(wpawns & rook_backward[W][sq]))
        p->scoremg += backward_on_semi_op_mg;
        
      if(((1ULL << (sq-8)) & p->attacks[W])
      && ((1ULL << (sq-8)) & woutposts))
        p->scoremg += weak_square_mg;
    }
    if(doubled)
    { p->scoremg += doubled_penalty_mg[file];
      p->scoreeg += doubled_penalty_eg[file];
    }

    if(isolated)
    { p->scoremg += isolated_penalty_mg[file];
      p->scoreeg += isolated_penalty_eg[file];
      if(!(wpawns & rook_backward[W][sq]))
        p->scoremg += isolated_on_semi_op_mg;
    }
    if(connected)
    { p->scoremg -= pawn_chain_element_mg[file];
      p->scoreeg -= pawn_chain_element_eg[file];
    }
    if(passed)
    { p->passers |= (1ULL << sq);
      p->scoremg -= passer_bonus_mg[B][rank];
      if(connected) p->scoremg -= connected_passer_mg[B][rank];
    }
    else
    { if(!(wpawns & rook_backward[W][sq]))
      { if((popcnt(bpawns & backward_mask[B][sq])) >= \
           (popcnt(wpawns & backward_mask[W][sq-8]))){
              p->scoremg -= candidate_mg[B][rank];
              p->scoreeg -= candidate_eg[B][rank];
        }
      }
    }
  }
  
  //saving the pawn shield/storms:
  pawn_shield_store(p);
  return (p);
}
Пример #23
0
    void BVH4iIntersector16Hybrid<LeafIntersector,ENABLE_COMPRESSED_BVH4I_NODES>::intersect(mic_i* valid_i, BVH4i* bvh, Ray16& ray16)
    {
      /* near and node stack */
      __aligned(64) mic_f   stack_dist[3*BVH4i::maxDepth+1];
      __aligned(64) NodeRef stack_node[3*BVH4i::maxDepth+1];
      __aligned(64) NodeRef stack_node_single[3*BVH4i::maxDepth+1]; 

      /* load ray */
      const mic_m valid0     = *(mic_i*)valid_i != mic_i(0);
      const mic3f rdir16     = rcp_safe(ray16.dir);
      const mic3f org_rdir16 = ray16.org * rdir16;
      mic_f ray_tnear        = select(valid0,ray16.tnear,pos_inf);
      mic_f ray_tfar         = select(valid0,ray16.tfar ,neg_inf);
      const mic_f inf        = mic_f(pos_inf);
      
      /* allocate stack and push root node */
      stack_node[0] = BVH4i::invalidNode;
      stack_dist[0] = inf;
      stack_node[1] = bvh->root;
      stack_dist[1] = ray_tnear; 
      NodeRef* __restrict__ sptr_node = stack_node + 2;
      mic_f*   __restrict__ sptr_dist = stack_dist + 2;
      
      const Node      * __restrict__ nodes = (Node     *)bvh->nodePtr();
      const Triangle1 * __restrict__ accel = (Triangle1*)bvh->triPtr();

      while (1) pop:
      {
        /* pop next node from stack */
        NodeRef curNode = *(sptr_node-1);
        mic_f curDist   = *(sptr_dist-1);
        sptr_node--;
        sptr_dist--;
	const mic_m m_stackDist = ray_tfar > curDist;

	/* stack emppty ? */
        if (unlikely(curNode == BVH4i::invalidNode))  break;
        
        /* cull node if behind closest hit point */
        if (unlikely(none(m_stackDist))) continue;
        
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////

	/* switch to single ray mode */
        if (unlikely(countbits(m_stackDist) <= BVH4i::hybridSIMDUtilSwitchThreshold)) 
	  {
	    float   *__restrict__ stack_dist_single = (float*)sptr_dist;
	    store16f(stack_dist_single,inf);

	    /* traverse single ray */	  	  
	    long rayIndex = -1;
	    while((rayIndex = bitscan64(rayIndex,m_stackDist)) != BITSCAN_NO_BIT_SET_64) 
	      {	    
		stack_node_single[0] = BVH4i::invalidNode;
		stack_node_single[1] = curNode;
		size_t sindex = 2;

		const mic_f org_xyz      = loadAOS4to16f(rayIndex,ray16.org.x,ray16.org.y,ray16.org.z);
		const mic_f dir_xyz      = loadAOS4to16f(rayIndex,ray16.dir.x,ray16.dir.y,ray16.dir.z);
		const mic_f rdir_xyz     = loadAOS4to16f(rayIndex,rdir16.x,rdir16.y,rdir16.z);
		const mic_f org_rdir_xyz = org_xyz * rdir_xyz;
		const mic_f min_dist_xyz = broadcast1to16f(&ray16.tnear[rayIndex]);
		mic_f       max_dist_xyz = broadcast1to16f(&ray16.tfar[rayIndex]);

		const unsigned int leaf_mask = BVH4I_LEAF_MASK;

		while (1) 
		  {
		    NodeRef curNode = stack_node_single[sindex-1];
		    sindex--;
            
		    traverse_single_intersect<ENABLE_COMPRESSED_BVH4I_NODES>(curNode,
									     sindex,
									     rdir_xyz,
									     org_rdir_xyz,
									     min_dist_xyz,
									     max_dist_xyz,
									     stack_node_single,
									     stack_dist_single,
									     nodes,
									     leaf_mask);
	    

		    /* return if stack is empty */
		    if (unlikely(curNode == BVH4i::invalidNode)) break;


		    /* intersect one ray against four triangles */

		    const bool hit = LeafIntersector::intersect(curNode,
								rayIndex,
								dir_xyz,
								org_xyz,
								min_dist_xyz,
								max_dist_xyz,
								ray16,
								accel,
								(Scene*)bvh->geometry);

		    if (hit)
		      compactStack(stack_node_single,stack_dist_single,sindex,max_dist_xyz);
		  }
	      }
	    ray_tfar = select(valid0,ray16.tfar ,neg_inf);
	    continue;
	  }

	///////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////

	const unsigned int leaf_mask = BVH4I_LEAF_MASK;

	const mic3f org = ray16.org;
	const mic3f dir = ray16.dir;

        while (1)
        {
          /* test if this is a leaf node */
          if (unlikely(curNode.isLeaf(leaf_mask))) break;
          
          STAT3(normal.trav_nodes,1,popcnt(ray_tfar > curDist),16);
          const Node* __restrict__ const node = curNode.node(nodes);

	  prefetch<PFHINT_L1>((mic_f*)node + 0); 
	  prefetch<PFHINT_L1>((mic_f*)node + 1); 
          
          /* pop of next node */
          sptr_node--;
          sptr_dist--;
          curNode = *sptr_node; 
          curDist = *sptr_dist;
          

#pragma unroll(4)
          for (unsigned int i=0; i<4; i++)
          {
	    BVH4i::NodeRef child;
	    mic_f lclipMinX,lclipMinY,lclipMinZ;
	    mic_f lclipMaxX,lclipMaxY,lclipMaxZ;

	    if (!ENABLE_COMPRESSED_BVH4I_NODES)
	      {
		child = node->lower[i].child;

		lclipMinX = msub(node->lower[i].x,rdir16.x,org_rdir16.x);
		lclipMinY = msub(node->lower[i].y,rdir16.y,org_rdir16.y);
		lclipMinZ = msub(node->lower[i].z,rdir16.z,org_rdir16.z);
		lclipMaxX = msub(node->upper[i].x,rdir16.x,org_rdir16.x);
		lclipMaxY = msub(node->upper[i].y,rdir16.y,org_rdir16.y);
		lclipMaxZ = msub(node->upper[i].z,rdir16.z,org_rdir16.z);
	      }
	    else
	      {
		BVH4i::QuantizedNode* __restrict__ const compressed_node = (BVH4i::QuantizedNode*)node;
		child = compressed_node->child(i);

		const mic_f startXYZ = compressed_node->decompress_startXYZ();
		const mic_f diffXYZ  = compressed_node->decompress_diffXYZ();
		const mic_f clower   = compressed_node->decompress_lowerXYZ(startXYZ,diffXYZ);
		const mic_f cupper   = compressed_node->decompress_upperXYZ(startXYZ,diffXYZ);

		lclipMinX = msub(mic_f(clower[4*i+0]),rdir16.x,org_rdir16.x);
		lclipMinY = msub(mic_f(clower[4*i+1]),rdir16.y,org_rdir16.y);
		lclipMinZ = msub(mic_f(clower[4*i+2]),rdir16.z,org_rdir16.z);
		lclipMaxX = msub(mic_f(cupper[4*i+0]),rdir16.x,org_rdir16.x);
		lclipMaxY = msub(mic_f(cupper[4*i+1]),rdir16.y,org_rdir16.y);
		lclipMaxZ = msub(mic_f(cupper[4*i+2]),rdir16.z,org_rdir16.z);		
	      }
	    
	    if (unlikely(i >=2 && child == BVH4i::invalidNode)) break;

            const mic_f lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
            const mic_f lfarP  = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
            const mic_m lhit   = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar);   
	    const mic_f childDist = select(lhit,lnearP,inf);
            const mic_m m_child_dist = childDist < curDist;


            /* if we hit the child we choose to continue with that child if it 
               is closer than the current next child, or we push it onto the stack */
            if (likely(any(lhit)))
            {
              sptr_node++;
              sptr_dist++;

              /* push cur node onto stack and continue with hit child */
              if (any(m_child_dist))
              {
                *(sptr_node-1) = curNode;
                *(sptr_dist-1) = curDist; 
                curDist = childDist;
                curNode = child;
              }              
              /* push hit child onto stack*/
              else 
		{
		  *(sptr_node-1) = child;
		  *(sptr_dist-1) = childDist; 

		  const char* __restrict__ const pnode = (char*)child.node(nodes);             
		  prefetch<PFHINT_L2>(pnode + 0);
		  prefetch<PFHINT_L2>(pnode + 64);
		}
              assert(sptr_node - stack_node < BVH4i::maxDepth);
            }	      
          }
#if SWITCH_ON_DOWN_TRAVERSAL == 1
	  const mic_m curUtil = ray_tfar > curDist;
	  if (unlikely(countbits(curUtil) <= BVH4i::hybridSIMDUtilSwitchThreshold))
	    {
	      *sptr_node++ = curNode;
	      *sptr_dist++ = curDist; 
	      goto pop;
	    }
#endif
        }
        
        /* return if stack is empty */
        if (unlikely(curNode == BVH4i::invalidNode)) break;
        

        /* intersect leaf */
        const mic_m m_valid_leaf = ray_tfar > curDist;
        STAT3(normal.trav_leaves,1,popcnt(m_valid_leaf),16);

	LeafIntersector::intersect16(curNode,m_valid_leaf,dir,org,ray16,accel,(Scene*)bvh->geometry);

        ray_tfar = select(m_valid_leaf,ray16.tfar,ray_tfar);
      }
Пример #24
0
// Generate a return statement.
//
void GenerateReturn(SYM *sym, Statement *stmt)
{
	AMODE *ap;
	int nn;
	int lab1;
	int cnt;

	// Generate code to evaluate the return expression.
    if( stmt != NULL && stmt->exp != NULL )
	{
		initstack();
		ap = GenerateExpression(stmt->exp,F_ALL & ~F_BREG,8);
		// Force return value into register 1
		if( ap->preg != 1 ) {
			if (ap->mode == am_immed)
				GenerateDiadic(op_ldi, 0, makereg(1),ap);
			else if (ap->mode == am_reg)
				GenerateDiadic(op_mov, 0, makereg(1),ap);
			else
				GenerateDiadic(op_lw,0,makereg(1),ap);
		}
	}

	// Generate the return code only once. Branch to the return code for all returns.
	if( retlab == -1 )
    {
		retlab = nextlabel++;
		GenerateLabel(retlab);
		// Unlock any semaphores that may have been set
		for (nn = lastsph - 1; nn >= 0; nn--)
			GenerateDiadic(op_sb,0,makereg(0),make_string(semaphores[nn]));
		if (sym->IsNocall)	// nothing to do for nocall convention
			return;
		// Restore registers used as register variables.
		if( bsave_mask != 0 ) {
			cnt = (bitsset(bsave_mask)-1)*8;
			for (nn = 15; nn >=1 ; nn--) {
				if (bsave_mask & (1 << nn)) {
					GenerateDiadic(op_ss|op_lws,0,makebreg(nn),make_indexed(cnt,SP));
					cnt -= 8;
				}
			}
			GenerateTriadic(op_addui,0,makereg(SP),makereg(SP),make_immed(popcnt(bsave_mask)*8));
		}
		if( save_mask != 0 ) {
			cnt = (bitsset(save_mask)-1)*8;
			for (nn = 31; nn >=1 ; nn--) {
				if (save_mask & (1 << nn)) {
					GenerateDiadic(op_ss|op_lw,0,makereg(nn),make_indexed(cnt,SP));
					cnt -= 8;
				}
			}
			GenerateTriadic(op_addui,0,makereg(SP),makereg(SP),make_immed(popcnt(save_mask)*8));
		}
		// Unlink the stack
		// For a leaf routine the link register and exception link register doesn't need to be saved/restored.
		if (lc_auto || sym->NumParms > 0) {
			GenerateDiadic(op_mov,0,makereg(SP),makereg(regBP));
			GenerateDiadic(op_ss|op_lw,0,makereg(regBP),make_indirect(regSP));
		}
		if (!sym->IsLeaf) {
			if (exceptions)
				GenerateDiadic(op_ss|op_lws,0,makebreg(CLR),make_indexed(8,regSP));
			GenerateDiadic(op_ss|op_lws,0,makebreg(LR),make_indexed(16,regSP));
//			if (sym->UsesPredicate)
		}
		GenerateDiadic(op_ss|op_lws,0,make_string("pregs"),make_indexed(24,regSP));
		//if (isOscall) {
		//	GenerateDiadic(op_move,0,makereg(0),make_string("_TCBregsave"));
		//	gen_regrestore();
		//}
		// Generate the return instruction. For the Pascal calling convention pop the parameters
		// from the stack.
		if (sym->IsInterrupt) {
			//GenerateTriadic(op_addui,0,makereg(30),makereg(30),make_immed(24));
			//GenerateDiadic(op_lm,0,make_indirect(30),make_mask(0x9FFFFFFE));
			//GenerateTriadic(op_addui,0,makereg(30),makereg(30),make_immed(popcnt(0x9FFFFFFE)*8));
			GenerateMonadic(op_rti,0,(AMODE *)NULL);
			return;
		}
		if (sym->IsPascal) {
			GenerateTriadic(op_addui,0,makereg(regSP),makereg(regSP),make_immed(32+sym->NumParms * 8));
			GenerateMonadic(op_rts,0,(AMODE *)NULL);
		}
		else {
			GenerateTriadic(op_addui,0,makereg(regSP),makereg(regSP),make_immed(32));
			GenerateMonadic(op_rts,0,(AMODE*)NULL);
		}
    }
	// Just branch to the already generated stack cleanup code.
	else {
		GenerateMonadic(op_bra,0,make_clabel(retlab));
	}
}
Пример #25
0
// Generate function epilog code.
//
void GenerateEpilog(SYM *sym)
{
	AMODE *ap;
	int nn;
	int lab1;
	int cnt;

	// Generate the return code only once. Branch to the return code for all returns.
	GenerateLabel(retlab);
	// Unlock any semaphores that may have been set
	for (nn = lastsph - 1; nn >= 0; nn--)
		GenerateDiadic(op_sb,0,makereg(0),make_string(semaphores[nn]));
	if (sym->IsNocall)	// nothing to do for nocall convention
		return;
	// Restore registers used as register variables.
	if( bsave_mask != 0 ) {
		cnt = (bitsset(bsave_mask)-1)*8;
		for (nn = 15; nn >=1 ; nn--) {
			if (bsave_mask & (1 << nn)) {
				GenerateDiadic(op_lws,0,makebreg(nn),make_indexed(cnt,regSP));
				cnt -= 8;
			}
		}
		GenerateTriadic(op_addui,0,makereg(SP),makereg(regSP),make_immed(popcnt(bsave_mask)*8));
	}
	if( save_mask != 0 ) {
		cnt = (bitsset(save_mask)-1)*8;
		for (nn = 31; nn >=1 ; nn--) {
			if (save_mask & (1 << nn)) {
				GenerateDiadic(op_lw,0,makereg(nn),make_indexed(cnt,regSP));
				cnt -= 8;
			}
		}
		GenerateTriadic(op_addui,0,makereg(regSP),makereg(regSP),make_immed(popcnt(save_mask)*8));
	}
	// Unlink the stack
	// For a leaf routine the link register and exception link register doesn't need to be saved/restored.
	if (lc_auto || sym->NumParms > 0) {
		GenerateDiadic(op_mov,0,makereg(regSP),makereg(regBP));
		GenerateDiadic(op_lw,0,makereg(regBP),make_indirect(regSP));
	}
	if (!sym->IsLeaf) {
		if (exceptions)
			GenerateDiadic(op_lws,0,makebreg(regXLR),make_indexed(8,regSP));        // 11=CLR
		GenerateDiadic(op_lws,0,makebreg(regLR),make_indexed(16,regSP));            // 1 = LR
//			if (sym->UsesPredicate)
	}
	GenerateDiadic(op_lws,0,make_string("pregs"),make_indexed(24,regSP));
	GenerateDiadic(op_lw,0,makereg(regCLP),make_indexed(32,regSP));
	    if (sym->epilog) {
           if (optimize)
              opt1(sym->epilog);
	       GenerateStatement(sym->epilog);
	       return;
       }
	//if (isOscall) {
	//	GenerateDiadic(op_move,0,makereg(0),make_string("_TCBregsave"));
	//	gen_regrestore();
	//}
	// Generate the return instruction. For the Pascal calling convention pop the parameters
	// from the stack.
	if (sym->IsInterrupt) {
		//GenerateTriadic(op_addui,0,makereg(30),makereg(30),make_immed(24));
		//GenerateDiadic(op_lm,0,make_indirect(30),make_mask(0x9FFFFFFE));
		//GenerateTriadic(op_addui,0,makereg(30),makereg(30),make_immed(popcnt(0x9FFFFFFE)*8));
		GenerateMonadic(op_rti,0,(AMODE *)NULL);
		return;
	}
	if (sym->IsPascal) {
		GenerateTriadic(op_addui,0,makereg(regSP),makereg(regSP),make_immed(GetReturnBlockSize()+sym->NumParms * 8));
		GenerateMonadic(op_rts,0,(AMODE *)NULL);
	}
	else {
		GenerateTriadic(op_addui,0,makereg(regSP),makereg(regSP),make_immed(GetReturnBlockSize()));
		GenerateMonadic(op_rts,0,(AMODE*)NULL);
	}
}
Пример #26
0
  void BVH4Intersector4Chunk<TriangleIntersector4>::intersect(const BVH4Intersector4Chunk* This, Ray4& ray, const __m128 valid_i)
  {
    sseb valid = valid_i;
    NodeRef invalid = (NodeRef)1;
    const BVH4* bvh = This->bvh;
    STAT3(normal.travs,1,popcnt(valid),4);

    /* load ray into registers */
    ssef ray_near = select(valid,ray.tnear,pos_inf);
    ssef ray_far  = select(valid,ray.tfar ,neg_inf);
    sse3f rdir = rcp_safe(ray.dir);
    ray.tfar = ray_far;

    /* allocate stack and push root node */
    NodeRef stack_node[3*BVH4::maxDepth+1];
    ssef  stack_near[3*BVH4::maxDepth+1];
    stack_node[0] = invalid;
    stack_near[0] = ssef(inf);
    stack_node[1] = bvh->root;
    stack_near[1] = ray_near;
    NodeRef* sptr_node = stack_node+2;
    ssef * sptr_near = stack_near+2;
 
    while (1)
    {
      /* pop next node from stack */
      sptr_node--;
      sptr_near--;
      ssef  curDist = *sptr_near;
      NodeRef curNode = *sptr_node;
      if (unlikely(curNode == invalid))
        break;

      /* cull node if behind closest hit point */
      const sseb m_dist = curDist < ray_far;
      if (unlikely(none(m_dist))) 
        continue;

      while (1)
      {
        /* test if this is a leaf node */
        if (unlikely(curNode.isLeaf())) 
          break;

        STAT3(normal.trav_nodes,1,popcnt(valid),4);
        
        const Node* const node = curNode.node(bvh->nodePtr()); //NodeRef(curNode).node(nodes);
        //prefetch<PFHINT_L1>((ssef*)node + 1); // depth first order prefetch	
        
        /* pop of next node */
        sptr_node--;
        sptr_near--;
        curNode = *sptr_node;
        curDist = *sptr_near;
                
        for (unsigned i=0;i<4;i++)
	{
          const ssef dminx = (ssef(node->lower_x[i]) - ray.org.x) * rdir.x;
          const ssef dmaxx = (ssef(node->upper_x[i]) - ray.org.x) * rdir.x;
          const ssef dminy = (ssef(node->lower_y[i]) - ray.org.y) * rdir.y;
          const ssef dmaxy = (ssef(node->upper_y[i]) - ray.org.y) * rdir.y;
          const ssef dminz = (ssef(node->lower_z[i]) - ray.org.z) * rdir.z;
          const ssef dmaxz = (ssef(node->upper_z[i]) - ray.org.z) * rdir.z;
          
          const ssef dlowerx = min(dminx,dmaxx);
          const ssef dupperx = max(dminx,dmaxx);
          const ssef dlowery = min(dminy,dmaxy);
          const ssef duppery = max(dminy,dmaxy);
          const ssef dlowerz = min(dminz,dmaxz);
          const ssef dupperz = max(dminz,dmaxz);
          
          const ssef near = max(dlowerx,dlowery,dlowerz,ray_near);
          const ssef far  = min(dupperx,duppery,dupperz,ray_far );
          const sseb mhit = near <= far;
          
          const ssef childDist = select(mhit,near,inf);
          const sseb closer = childDist < curDist;

          /* if we hit the child we choose to continue with that child if it 
             is closer than the current next child, or we push it onto the stack */
          if (likely(any(mhit)))
          {
            const NodeRef child = node->child(i);
            //if (child != invalid)
            {
              sptr_node++;
              sptr_near++;
            
              /* push cur node onto stack and continue with hit child */
              if (any(closer)) {
                *(sptr_node-1) = curNode;
                *(sptr_near-1) = curDist; 
                curDist = childDist;
                curNode = child;
              } 
              
              /* push hit child onto stack*/
              else {
                *(sptr_node-1) = child;
                *(sptr_near-1) = childDist; 
              }          
            }	      
          }
        }
      }
      
      /* return if stack is empty */
      if (unlikely(curNode == invalid)) 
        break;
      
      /* decode leaf node */
      size_t num;
      STAT3(normal.trav_leaves,1,popcnt(valid),4);
      Triangle* tri = (Triangle*) curNode.leaf(bvh->triPtr(),num);
      
      /* intersect triangles */
      for (size_t i=0; i<num; i++)
        TriangleIntersector4::intersect(valid,ray,tri[i],bvh->vertices);
      
      ray_far = ray.tfar;
    }
  }
Пример #27
0
    void BVH8Intersector8Chunk<PrimitiveIntersector8>::intersect(avxb* valid_i, BVH8* bvh, Ray8& ray)
    {
#if defined(__AVX__)
      
      /* load ray */
      const avxb valid0 = *valid_i;
      const avx3f rdir = rcp_safe(ray.dir);
      const avx3f org_rdir = ray.org * rdir;
      avxf ray_tnear = select(valid0,ray.tnear,pos_inf);
      avxf ray_tfar  = select(valid0,ray.tfar ,neg_inf);
      const avxf inf = avxf(pos_inf);
      Precalculations pre(valid0,ray);
      
      /* allocate stack and push root node */
      avxf    stack_near[3*BVH8::maxDepth+1];
      NodeRef stack_node[3*BVH8::maxDepth+1];
      stack_node[0] = BVH8::invalidNode;
      stack_near[0] = inf;
      stack_node[1] = bvh->root;
      stack_near[1] = ray_tnear; 
      NodeRef* __restrict__ sptr_node = stack_node + 2;
      avxf*    __restrict__ sptr_near = stack_near + 2;
      
      while (1)
      {
        /* pop next node from stack */
        sptr_node--;
        sptr_near--;
        NodeRef cur = *sptr_node;
        if (unlikely(cur == BVH8::invalidNode)) 
          break;
        
        /* cull node if behind closest hit point */
        avxf curDist = *sptr_near;
        if (unlikely(none(ray_tfar > curDist))) 
          continue;
        
        while (1)
        {
          /* test if this is a leaf node */
          if (unlikely(cur.isLeaf()))
            break;
          
          const avxb valid_node = ray_tfar > curDist;
          STAT3(normal.trav_nodes,1,popcnt(valid_node),8);
          const Node* __restrict__ const node = (BVH8::Node*)cur.node();
          
          /* pop of next node */
          sptr_node--;
          sptr_near--;
          cur = *sptr_node; // FIXME: this trick creates issues with stack depth
          curDist = *sptr_near;
          
          for (unsigned i=0; i<BVH8::N; i++)
          {
            const NodeRef child = node->children[i];
            if (unlikely(child == BVH8::emptyNode)) break;
            
#if defined(__AVX2__)
            const avxf lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x);
            const avxf lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y);
            const avxf lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z);
            const avxf lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x);
            const avxf lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y);
            const avxf lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z);
            const avxf lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
            const avxf lfarP  = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
            const avxb lhit   = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar);      
#else
            const avxf lclipMinX = node->lower_x[i] * rdir.x - org_rdir.x;
            const avxf lclipMinY = node->lower_y[i] * rdir.y - org_rdir.y;
            const avxf lclipMinZ = node->lower_z[i] * rdir.z - org_rdir.z;
            const avxf lclipMaxX = node->upper_x[i] * rdir.x - org_rdir.x;
            const avxf lclipMaxY = node->upper_y[i] * rdir.y - org_rdir.y;
            const avxf lclipMaxZ = node->upper_z[i] * rdir.z - org_rdir.z;
            const avxf lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
            const avxf lfarP  = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
            const avxb lhit   = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar);      
#endif
            
            /* if we hit the child we choose to continue with that child if it 
               is closer than the current next child, or we push it onto the stack */
            if (likely(any(lhit)))
            {
              const avxf childDist = select(lhit,lnearP,inf);
              const NodeRef child = node->children[i];
              
              /* push cur node onto stack and continue with hit child */
              if (any(childDist < curDist))
              {
                *sptr_node = cur;
                *sptr_near = curDist; 
		sptr_node++;
		sptr_near++;

                curDist = childDist;
                cur = child;
              }
              
              /* push hit child onto stack*/
              else {
                *sptr_node = child;
                *sptr_near = childDist; 
		sptr_node++;
		sptr_near++;

              }
              assert(sptr_node - stack_node < BVH8::maxDepth);
            }	      
          }
        }
        
        /* return if stack is empty */
        if (unlikely(cur == BVH8::invalidNode)) 
          break;
        
        /* intersect leaf */
	assert(cur != BVH8::emptyNode);
        const avxb valid_leaf = ray_tfar > curDist;
        STAT3(normal.trav_leaves,1,popcnt(valid_leaf),8);
        size_t items; const Triangle* tri  = (Triangle*) cur.leaf(items);
        PrimitiveIntersector8::intersect(valid_leaf,pre,ray,tri,items,bvh->geometry);
        ray_tfar = select(valid_leaf,ray.tfar,ray_tfar);
      }
      AVX_ZERO_UPPER();
#endif       
    }
Пример #28
0
    void BVH8Intersector8Hybrid<PrimitiveIntersector8>::occluded(bool8* valid_i, BVH8* bvh, Ray8& ray)
    {
      /* load ray */
      const bool8 valid = *valid_i;
      bool8 terminated = !valid;
      Vec3f8 ray_org = ray.org, ray_dir = ray.dir;
      float8 ray_tnear = ray.tnear, ray_tfar  = ray.tfar;
      const Vec3f8 rdir = rcp_safe(ray_dir);
      const Vec3f8 org(ray_org), org_rdir = org * rdir;
      ray_tnear = select(valid,ray_tnear,float8(pos_inf));
      ray_tfar  = select(valid,ray_tfar ,float8(neg_inf));
      const float8 inf = float8(pos_inf);
      Precalculations pre(valid,ray);

      /* compute near/far per ray */
      Vec3i8 nearXYZ;
      nearXYZ.x = select(rdir.x >= 0.0f,int8(0*(int)sizeof(float8)),int8(1*(int)sizeof(float8)));
      nearXYZ.y = select(rdir.y >= 0.0f,int8(2*(int)sizeof(float8)),int8(3*(int)sizeof(float8)));
      nearXYZ.z = select(rdir.z >= 0.0f,int8(4*(int)sizeof(float8)),int8(5*(int)sizeof(float8)));

      /* allocate stack and push root node */
      float8    stack_near[stackSizeChunk];
      NodeRef stack_node[stackSizeChunk];
      stack_node[0] = BVH8::invalidNode;
      stack_near[0] = inf;
      stack_node[1] = bvh->root;
      stack_near[1] = ray_tnear; 
      NodeRef* stackEnd = stack_node+stackSizeChunk;
      NodeRef* __restrict__ sptr_node = stack_node + 2;
      float8*    __restrict__ sptr_near = stack_near + 2;

      while (1)
      {
        /* pop next node from stack */
        assert(sptr_node > stack_node);
        sptr_node--;
        sptr_near--;
        NodeRef cur = *sptr_node;
        if (unlikely(cur == BVH8::invalidNode)) {
          assert(sptr_node == stack_node);
          break;
        }

        /* cull node if behind closest hit point */
        float8 curDist = *sptr_near;
        const bool8 active = curDist < ray_tfar;
        if (unlikely(none(active))) 
          continue;
        
        /* switch to single ray traversal */
#if !defined(__WIN32__) || defined(__X86_64__)
        size_t bits = movemask(active);
        if (unlikely(__popcnt(bits) <= SWITCH_THRESHOLD)) {
          for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) {
            if (occluded1(bvh,cur,i,pre,ray,ray_org,ray_dir,rdir,ray_tnear,ray_tfar,nearXYZ))
              terminated[i] = -1;
          }
          if (all(terminated)) break;
          ray_tfar = select(terminated,float8(neg_inf),ray_tfar);
          continue;
        }
#endif
                
        while (1)
        {
          /* test if this is a leaf node */
          if (unlikely(cur.isLeaf()))
            break;
          
          const bool8 valid_node = ray_tfar > curDist;
          STAT3(shadow.trav_nodes,1,popcnt(valid_node),8);
          const Node* __restrict__ const node = (Node*)cur.node();
          
          /* pop of next node */
          assert(sptr_node > stack_node);
          sptr_node--;
          sptr_near--;
          cur = *sptr_node;
          curDist = *sptr_near;
          
          for (unsigned i=0; i<BVH8::N; i++)
          {
            const NodeRef child = node->children[i];
            if (unlikely(child == BVH8::emptyNode)) break;
            
#if defined(__AVX2__)
            const float8 lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x);
            const float8 lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y);
            const float8 lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z);
            const float8 lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x);
            const float8 lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y);
            const float8 lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z);
            const float8 lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
            const float8 lfarP  = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
            const bool8 lhit   = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar);      
#else
            const float8 lclipMinX = (node->lower_x[i] - org.x) * rdir.x;
            const float8 lclipMinY = (node->lower_y[i] - org.y) * rdir.y;
            const float8 lclipMinZ = (node->lower_z[i] - org.z) * rdir.z;
            const float8 lclipMaxX = (node->upper_x[i] - org.x) * rdir.x;
            const float8 lclipMaxY = (node->upper_y[i] - org.y) * rdir.y;
            const float8 lclipMaxZ = (node->upper_z[i] - org.z) * rdir.z;
            const float8 lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
            const float8 lfarP  = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
            const bool8 lhit   = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar);      
#endif
            
            /* if we hit the child we choose to continue with that child if it 
               is closer than the current next child, or we push it onto the stack */
            if (likely(any(lhit)))
            {
              assert(sptr_node < stackEnd);
              assert(child != BVH8::emptyNode);
              const float8 childDist = select(lhit,lnearP,inf);
              sptr_node++;
              sptr_near++;
              
              /* push cur node onto stack and continue with hit child */
              if (any(childDist < curDist))
              {
                *(sptr_node-1) = cur;
                *(sptr_near-1) = curDist; 
                curDist = childDist;
                cur = child;
              }
              
              /* push hit child onto stack */
              else {
                *(sptr_node-1) = child;
                *(sptr_near-1) = childDist; 
              }
            }	      
          }
        }
        
        /* return if stack is empty */
        if (unlikely(cur == BVH8::invalidNode)) {
          assert(sptr_node == stack_node);
          break;
        }
        
        /* intersect leaf */
	assert(cur != BVH8::emptyNode);
        const bool8 valid_leaf = ray_tfar > curDist;
        STAT3(shadow.trav_leaves,1,popcnt(valid_leaf),8);
        size_t items; const Triangle* prim = (Triangle*) cur.leaf(items);
        terminated |= PrimitiveIntersector8::occluded(!terminated,pre,ray,prim,items,bvh->scene);
        if (all(terminated)) break;
        ray_tfar = select(terminated,float8(neg_inf),ray_tfar);
      }
      store8i(valid & terminated,&ray.geomID,0);
      AVX_ZERO_UPPER();
    }
    void BVH4mbIntersector16Hybrid<LeafIntersector>::intersect(int16* valid_i, BVH4mb* bvh, Ray16& ray16)
    {
      /* near and node stack */
      __aligned(64) float16   stack_dist[3*BVH4i::maxDepth+1];
      __aligned(64) NodeRef stack_node[3*BVH4i::maxDepth+1];
      __aligned(64) NodeRef stack_node_single[3*BVH4i::maxDepth+1]; 

      /* load ray */
      const bool16 valid0     = *(int16*)valid_i != int16(0);
      const Vec3f16 rdir16     = rcp_safe(ray16.dir);
      const Vec3f16 org_rdir16 = ray16.org * rdir16;
      float16 ray_tnear        = select(valid0,ray16.tnear,pos_inf);
      float16 ray_tfar         = select(valid0,ray16.tfar ,neg_inf);
      const float16 inf        = float16(pos_inf);
      
      /* allocate stack and push root node */
      stack_node[0] = BVH4i::invalidNode;
      stack_dist[0] = inf;
      stack_node[1] = bvh->root;
      stack_dist[1] = ray_tnear; 
      NodeRef* __restrict__ sptr_node = stack_node + 2;
      float16*   __restrict__ sptr_dist = stack_dist + 2;
      
      const Node      * __restrict__ nodes = (Node     *)bvh->nodePtr();
      const BVH4mb::Triangle01 * __restrict__ accel = (BVH4mb::Triangle01 *)bvh->triPtr();

      while (1) pop:
      {
        /* pop next node from stack */
        NodeRef curNode = *(sptr_node-1);
        float16 curDist   = *(sptr_dist-1);
        sptr_node--;
        sptr_dist--;
	const bool16 m_stackDist = ray_tfar > curDist;

	/* stack emppty ? */
        if (unlikely(curNode == BVH4i::invalidNode))  break;
        
        /* cull node if behind closest hit point */
        if (unlikely(none(m_stackDist))) continue;
        
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////

	/* switch to single ray mode */
        if (unlikely(countbits(m_stackDist) <= BVH4i::hybridSIMDUtilSwitchThreshold)) 
	  {
	    float   *__restrict__ stack_dist_single = (float*)sptr_dist;
	    store16f(stack_dist_single,inf);

	    /* traverse single ray */	  	  
	    long rayIndex = -1;
	    while((rayIndex = bitscan64(rayIndex,m_stackDist)) != BITSCAN_NO_BIT_SET_64) 
	      {	    
		stack_node_single[0] = BVH4i::invalidNode;
		stack_node_single[1] = curNode;
		size_t sindex = 2;

		const float16 org_xyz      = loadAOS4to16f(rayIndex,ray16.org.x,ray16.org.y,ray16.org.z);
		const float16 dir_xyz      = loadAOS4to16f(rayIndex,ray16.dir.x,ray16.dir.y,ray16.dir.z);
		const float16 rdir_xyz     = loadAOS4to16f(rayIndex,rdir16.x,rdir16.y,rdir16.z);
		const float16 org_rdir_xyz = org_xyz * rdir_xyz;
		const float16 min_dist_xyz = broadcast1to16f(&ray16.tnear[rayIndex]);
		float16       max_dist_xyz = broadcast1to16f(&ray16.tfar[rayIndex]);
		const float16 time         = broadcast1to16f(&ray16.time[rayIndex]);

		const unsigned int leaf_mask = BVH4I_LEAF_MASK;

		while (1) 
		  {
		    NodeRef curNode = stack_node_single[sindex-1];
		    sindex--;
            
		    traverse_single_intersect(curNode,
					      sindex,
					      rdir_xyz,
					      org_rdir_xyz,
					      min_dist_xyz,
					      max_dist_xyz,
					      time,
					      stack_node_single,
					      stack_dist_single,
					      nodes,
					      leaf_mask);	    

		    /* return if stack is empty */
		    if (unlikely(curNode == BVH4i::invalidNode)) break;


		    /* intersect one ray against four triangles */
		    const bool hit = LeafIntersector::intersect(curNode,
								rayIndex,
								dir_xyz,
								org_xyz,
								min_dist_xyz,
								max_dist_xyz,
								ray16,
								accel,
								(Scene*)bvh->geometry);
		    
		    if (hit)
		      compactStack(stack_node_single,stack_dist_single,sindex,max_dist_xyz);

		  }	  
	      }
	    ray_tfar = select(valid0,ray16.tfar ,neg_inf);
	    continue;
	  }

	///////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////

	const unsigned int leaf_mask = BVH4I_LEAF_MASK;

	const float16 time     = ray16.time;
	const float16 one_time = (float16::one() - time);

        while (1)
        {
          /* test if this is a leaf node */
          if (unlikely(curNode.isLeaf(leaf_mask))) break;
          
          STAT3(normal.trav_nodes,1,popcnt(ray_tfar > curDist),16);
          const Node* __restrict__ const node = curNode.node(nodes);
          
          const BVH4mb::Node* __restrict__ const nodeMB = (BVH4mb::Node*)node;

          /* pop of next node */
          sptr_node--;
          sptr_dist--;
          curNode = *sptr_node; 
          curDist = *sptr_dist;
          
	  prefetch<PFHINT_L1>((char*)node + 0*64); 
	  prefetch<PFHINT_L1>((char*)node + 1*64); 
	  prefetch<PFHINT_L1>((char*)node + 2*64); 
	  prefetch<PFHINT_L1>((char*)node + 3*64); 

#pragma unroll(4)
          for (unsigned int i=0; i<4; i++)
          {
	    const NodeRef child = node->lower[i].child;

	    const float16 lower_x =  one_time * nodeMB->lower[i].x + time * nodeMB->lower_t1[i].x;
	    const float16 lower_y =  one_time * nodeMB->lower[i].y + time * nodeMB->lower_t1[i].y;
	    const float16 lower_z =  one_time * nodeMB->lower[i].z + time * nodeMB->lower_t1[i].z;
	    const float16 upper_x =  one_time * nodeMB->upper[i].x + time * nodeMB->upper_t1[i].x;
	    const float16 upper_y =  one_time * nodeMB->upper[i].y + time * nodeMB->upper_t1[i].y;
	    const float16 upper_z =  one_time * nodeMB->upper[i].z + time * nodeMB->upper_t1[i].z;

	    if (unlikely(i >=2 && child == BVH4i::invalidNode)) break;

            const float16 lclipMinX = msub(lower_x,rdir16.x,org_rdir16.x);
            const float16 lclipMinY = msub(lower_y,rdir16.y,org_rdir16.y);
            const float16 lclipMinZ = msub(lower_z,rdir16.z,org_rdir16.z);
            const float16 lclipMaxX = msub(upper_x,rdir16.x,org_rdir16.x);
            const float16 lclipMaxY = msub(upper_y,rdir16.y,org_rdir16.y);
            const float16 lclipMaxZ = msub(upper_z,rdir16.z,org_rdir16.z);
	    
            const float16 lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
            const float16 lfarP  = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
            const bool16 lhit   = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar);   
	    const float16 childDist = select(lhit,lnearP,inf);
            const bool16 m_child_dist = childDist < curDist;
            /* if we hit the child we choose to continue with that child if it 
               is closer than the current next child, or we push it onto the stack */
            if (likely(any(lhit)))
            {
              sptr_node++;
              sptr_dist++;
              
              /* push cur node onto stack and continue with hit child */
              if (any(m_child_dist))
              {
                *(sptr_node-1) = curNode;
                *(sptr_dist-1) = curDist; 
                curDist = childDist;
                curNode = child;
              }              
              /* push hit child onto stack*/
              else 
		{
		  *(sptr_node-1) = child;
		  *(sptr_dist-1) = childDist; 
		}
              assert(sptr_node - stack_node < BVH4i::maxDepth);
            }	      
          }
#if SWITCH_ON_DOWN_TRAVERSAL == 1
	  const bool16 curUtil = ray_tfar > curDist;
	  if (unlikely(countbits(curUtil) <= BVH4i::hybridSIMDUtilSwitchThreshold))
	    {
	      *sptr_node++ = curNode;
	      *sptr_dist++ = curDist; 
	      goto pop;
	    }
#endif

        }
        
        /* return if stack is empty */
        if (unlikely(curNode == BVH4i::invalidNode)) break;
        

        /* intersect leaf */
        const bool16 m_valid_leaf = ray_tfar > curDist;
        STAT3(normal.trav_leaves,1,popcnt(m_valid_leaf),16);

	LeafIntersector::intersect16(curNode,
				     m_valid_leaf,
				     ray16.dir,
				     ray16.org,
				     ray16,
				     accel,
				     (Scene*)bvh->geometry);


        ray_tfar = select(m_valid_leaf,ray16.tfar,ray_tfar);
      }
Пример #30
0
    void BVH4iIntersector16Chunk<LeafIntersector,ENABLE_COMPRESSED_BVH4I_NODES>::occluded(mic_i* valid_i, BVH4i* bvh, Ray16& ray)
    {
      /* allocate stack */
      __aligned(64) mic_f    stack_dist[3*BVH4i::maxDepth+1];
      __aligned(64) NodeRef stack_node[3*BVH4i::maxDepth+1];

      /* load ray */
      const mic_m valid = *(mic_i*)valid_i != mic_i(0);
      mic_m m_terminated = !valid;
      const mic3f rdir = rcp_safe(ray.dir);
      const mic3f org_rdir = ray.org * rdir;
      mic_f ray_tnear = select(valid,ray.tnear,pos_inf);
      mic_f ray_tfar  = select(valid,ray.tfar ,neg_inf);
      const mic_f inf = mic_f(pos_inf);
      
      /* push root node */
      stack_node[0] = BVH4i::invalidNode;
      stack_dist[0] = inf;
      stack_node[1] = bvh->root;
      stack_dist[1] = ray_tnear; 
      NodeRef* __restrict__ sptr_node = stack_node + 2;
      mic_f*   __restrict__ sptr_dist = stack_dist + 2;
      
      const Node      * __restrict__ nodes = (Node     *)bvh->nodePtr();
      const Triangle1 * __restrict__ accel = (Triangle1*)bvh->triPtr();

      const mic3f org = ray.org;
      const mic3f dir = ray.dir;

      while (1)
      {
	const mic_m m_active = !m_terminated;

        /* pop next node from stack */
        NodeRef curNode = *(sptr_node-1);
        mic_f curDist   = *(sptr_dist-1);
        sptr_node--;
        sptr_dist--;
	const mic_m m_stackDist = gt(m_active,ray_tfar,curDist);

	/* stack emppty ? */
        if (unlikely(curNode == BVH4i::invalidNode))  break;
        
        /* cull node if behind closest hit point */

        if (unlikely(none(m_stackDist))) { continue; }

	const unsigned int leaf_mask = BVH4I_LEAF_MASK;

	traverse_chunk_occluded<ENABLE_COMPRESSED_BVH4I_NODES>(curNode,
							       curDist,
							       rdir,
							       org_rdir,
							       ray_tnear,
							       ray_tfar,
							       m_active,
							       sptr_node,
							       sptr_dist,
							       nodes,
							       leaf_mask);            		    	
        
        /* return if stack is empty */
        if (unlikely(curNode == BVH4i::invalidNode)) break;
        
        /* intersect leaf */
        mic_m m_valid_leaf = gt(m_active,ray_tfar,curDist);
        STAT3(shadow.trav_leaves,1,popcnt(m_valid_leaf),16);

	LeafIntersector::occluded16(curNode,m_valid_leaf,dir,org,ray,m_terminated,accel,(Scene*)bvh->geometry);

        if (unlikely(all(m_terminated))) break;
        ray_tfar = select(m_terminated,neg_inf,ray_tfar);
      }
      store16i(valid & m_terminated,&ray.geomID,0);
    }