__m256 BVH2Intersector8Chunk<TriangleIntersector>::occluded(const BVH2Intersector8Chunk* This, Ray8& ray, const __m256 valid_i) { avxb valid = valid_i; avxb terminated = !valid; const BVH2* bvh = This->bvh; STAT3(shadow.travs,1,popcnt(valid),8); NodeRef stack[1+BVH2::maxDepth]; //!< stack of nodes that still need to get traversed NodeRef* stackPtr = stack; //!< current stack pointer NodeRef cur = bvh->root; //!< in cur we track the ID of the current node /* let inactive rays miss all boxes */ const avx3f rdir = rcp_safe(ray.dir); avxf rayFar = select(terminated,avxf(neg_inf),ray.tfar); while (true) { /*! downtraversal loop */ while (likely(cur.isNode())) { STAT3(normal.trav_nodes,1,popcnt(valid),8); /* intersect packet with box of both children */ const Node* node = cur.node(); const size_t hit0 = intersectBox(ray.org,rdir,ray.tnear,rayFar,node,0); const size_t hit1 = intersectBox(ray.org,rdir,ray.tnear,rayFar,node,1); /*! if two children are hit push both onto stack */ if (likely(hit0 != 0 && hit1 != 0)) { *stackPtr = node->child(0); stackPtr++; cur = node->child(1); } /*! if one child hit, continue with that child */ else { if (likely(hit0 != 0)) cur = node->child(0); else if (likely(hit1 != 0)) cur = node->child(1); else goto pop_node; } } /*! leaf node, intersect all triangles */ { STAT3(shadow.trav_leaves,1,popcnt(valid),8); size_t num; Triangle* tri = (Triangle*) cur.leaf(NULL,num); for (size_t i=0; i<num; i++) { terminated |= TriangleIntersector::occluded(valid,ray,tri[i],bvh->vertices); if (all(terminated)) return terminated; } /* let terminated rays miss all boxes */ rayFar = select(terminated,avxf(neg_inf),rayFar); } /*! pop next node from stack */ pop_node: if (unlikely(stackPtr == stack)) break; cur = *(--stackPtr); } return terminated; }
inline uint32_t select64(uint64_t v, size_t r) { assert(r <= 64); if (r > popcnt(v)) return 64; uint32_t pos = 0; uint32_t c = popcnt(uint32_t(v)); if (r > c) { r -= c; pos = 32; v >>= 32; }
int reset_for_buflen (unsigned int thread, int new_buflen) { // make sure new size is power of 2 if (popcnt (new_buflen) != 1) return -1; reset_buflen = TRUE; uni[thread].buflen = new_buflen; top[thread].jack.reset_size = new_buflen; safefree ((char *) top[thread].hold.buf.r); safefree ((char *) top[thread].hold.buf.l); safefree ((char *) top[thread].hold.aux.r); safefree ((char *) top[thread].hold.aux.l); destroy_workspace (thread); reset_buflen = FALSE; loc[thread].def.size = new_buflen; setup_workspace (loc[thread].def.rate, loc[thread].def.size, loc[thread].def.mode, loc[thread].path.wisdom, loc[thread].def.spec, loc[thread].def.nrx, loc[thread].def.size, thread); setup_local_audio (thread); reset_meters (thread); reset_spectrum (thread); reset_counters (thread); return 0; }
/* * The score is always computed as it was WHITE to play. * The same applies to all the heuristic functions. * * Notes: * - The quiescence search guarantees that there are no captures pending for the side playing. * - Contrary to other engines, we don't give a bonus to the side playing because he already has an advantage in mobility: * the opponent may have pending captures, and these are not giving him any bonuses. */ int evaluate(Position * const position) { int score; unsigned int game_phase; uint32 white = position->white(); uint32 black = position->black(); uint32 kings = position->kings(); player_t to_play = position->to_play(); uint32 w_men = white & ~kings; uint32 w_kings = white & kings; uint32 b_men = black & ~kings; uint32 b_kings = black & kings; uint32 occupied = white | black; uint32 empty = ~occupied; int n_w_men = popcnt(w_men); int n_w_kings = popcnt(w_kings); int n_b_men = popcnt(b_men); int n_b_kings = popcnt(b_kings); int n_pieces = n_w_men + n_w_kings + n_b_men + n_b_kings; game_phase = (n_pieces >= MIDDLE_GAME ? (n_pieces >= OPENING ? 0 : 1) : 2); // material score = (n_w_men - n_b_men) * MAN_VALUE[game_phase] + (n_w_kings - n_b_kings) * KING_VALUE[game_phase]; position->set_to_play(WHITE); score += w_mobility(w_men, w_kings, black, b_kings, empty, game_phase); position->set_to_play(BLACK); score -= b_mobility(b_men, b_kings, white, w_kings, empty, game_phase); position->set_to_play(to_play); score += back_rank(white, black, w_kings, b_kings, game_phase); score += triangle(w_men, b_men, w_kings, b_kings, game_phase); score += balance(white, black, game_phase); score += runaway(w_men, b_men, empty, game_phase); score += bridge_exploit(white, black, w_kings, b_kings, game_phase); score += diagonal_control(w_kings, b_kings, occupied, game_phase); return (to_play == WHITE ? score : -score); }
static int eval_black_king(int sq) { register int pawn_first; int score = 0; safety[B] = &empty[0]; if(!(board->castle & (BLACK_OO|BLACK_OOO))) { if(sq > F8 && sq <= H8) { pawn_first = calc_rank64(bitscanr(board->bb_pawns[B] & file_mask[FILE_F])); score += (king_shield[B][pawn_first]) * 3; pawn_first = calc_rank64(bitscanr(board->bb_pawns[B] & file_mask[FILE_G])); score += (king_shield[B][pawn_first]) * 2; pawn_first = calc_rank64(bitscanr(board->bb_pawns[B] & file_mask[FILE_H])); score += (king_shield[B][pawn_first]); score -= popcnt(board->bb_pawns[W] & king_storm_mask[B]) * 4; safety[B] = &safety_kingside[B][0]; } else if(sq < D8 && sq >= A8) { pawn_first = calc_rank64(bitscanr(board->bb_pawns[B] & file_mask[FILE_C])); score += (king_shield[B][pawn_first]) * 3; pawn_first = calc_rank64(bitscanr(board->bb_pawns[B] & file_mask[FILE_B])); score += (king_shield[B][pawn_first]) * 2; pawn_first = calc_rank64(bitscanr(board->bb_pawns[B] & file_mask[FILE_A])); score += (king_shield[B][pawn_first]); score -= popcnt(board->bb_pawns[W] & queen_storm_mask[B]) * 4; safety[B] = &safety_queenside[B][0]; } else score -= KING_UNCASTLED_STILL; } else { score -= KING_UNCASTLED_YET; if(PieceType(sq - 16) != BP &&(PieceType(sq - 15) != BP || PieceType(sq - 17) != BP)) score -= KING_EXPOSED; } return score; }
static int eval_white_king(int sq) { register int pawn_first; int score = 0; safety[W] = &empty[0]; if(!(board->castle & (WHITE_OO|WHITE_OOO))) { if(sq > F1 && sq <= H1) { pawn_first = calc_rank64(bitscanf(board->bb_pawns[W] & file_mask[FILE_F])); score += (king_shield[W][pawn_first]) * 3; pawn_first = calc_rank64(bitscanf(board->bb_pawns[W] & file_mask[FILE_G])); score += (king_shield[W][pawn_first]) * 2; pawn_first = calc_rank64(bitscanf(board->bb_pawns[W] & file_mask[FILE_H])); score += (king_shield[W][pawn_first]); score -= popcnt(board->bb_pawns[B] & king_storm_mask[W]) * 4; safety[W] = &safety_kingside[W][0]; } else if(sq < D1 && sq >= A1) { pawn_first = calc_rank64(bitscanf(board->bb_pawns[W] & file_mask[FILE_C])); score += (king_shield[W][pawn_first]) * 3; pawn_first = calc_rank64(bitscanf(board->bb_pawns[W] & file_mask[FILE_B])); score += (king_shield[W][pawn_first]) * 2; pawn_first = calc_rank64(bitscanf(board->bb_pawns[W] & file_mask[FILE_A])); score += (king_shield[W][pawn_first]); score -= popcnt(board->bb_pawns[B] & queen_storm_mask[W]) * 4; safety[W] = &safety_queenside[W][0]; } else score -= KING_UNCASTLED_STILL; } else { score -= KING_UNCASTLED_YET; if(PieceType(sq + 16) != WP &&(PieceType(sq + 15) != WP || PieceType(sq + 17) != WP)) score -= KING_EXPOSED; } return score; }
size_t hasMoreThen(size_t items){ size_t so_far = 0; for (size_t i = 0; i < words; i++){ if(data[i]){ so_far += popcnt(data[i]); if(so_far > items) return true; } } return false; }
uint64_t select1(uint64_t rank) const { uint64_t pos = 0; uint64_t crank; uint64_t i = 0; while ( rank >= (crank=popcnt(A[i])) ) { rank -= crank; pos += 64; i++; } uint64_t word = A[i]; if ( rank >= (crank=popcnt(word&0x00000000FFFFFFFFULL)) ) { rank -= crank; pos += 32; word >>= 32; }
long int fmap_find(const uint8_t *image, unsigned int image_len) { long int ret = -1; if ((image == NULL) || (image_len == 0)) return -1; if (popcnt(image_len) == 1) ret = fmap_bsearch(image, image_len); else ret = fmap_lsearch(image, image_len); return ret; }
void testmap(void) { pages = calloc(1, PAGES * sizeof(struct page)); if (!pages) exit(100); printf("simple tests\n"); #define MB ((1024*1024)/pagesize) setpol(0, PAGES, MPOL_INTERLEAVE, 3); setpol(0, MB, MPOL_BIND, 1); setpol(MB, MB, MPOL_BIND, 1); setpol(MB, MB, MPOL_DEFAULT, 0); setpol(MB, MB, MPOL_PREFERRED, 2); setpol(MB/2, MB, MPOL_DEFAULT, 0); setpol(MB+MB/2, MB, MPOL_BIND, 2); setpol(MB/2+100, 100, MPOL_PREFERRED, 1); setpol(100, 200, MPOL_PREFERRED, 1); printf("done\n"); for (;;) { unsigned long offset = random() % PAGES; int policy = random() % (MPOL_MAX); unsigned long nodes = random() % 4; long length = random() % (PAGES - offset); /* validate */ switch (policy) { case MPOL_DEFAULT: nodes = 0; break; case MPOL_INTERLEAVE: case MPOL_BIND: if (nodes == 0) continue; break; case MPOL_PREFERRED: if (popcnt(nodes) != 1) continue; break; } setpol(offset, length, policy, nodes); } }
uint64 gen_multiplier(int sq, bitboard_t mask, int bits, int *mbits, int *ibits, int rook) { int fail,b; int trials; int bit_trials; uint64 magic; uint64 index; bitboard_t x, blockers[4096], solution[4096], used[4096]; for(x = 0ULL; x < (1ULL << bits); x++) { blockers[x] = get_blockers(x, mask); solution[x] = (rook) ? (get_rook_atk(sq,blockers[x])) : (get_bishop_atk(sq, blockers[x])); } trials = 0; bit_trials = 1; for(;;) { ibits[sq] = 0; magic = rand64_1bits(bit_trials); for(x = 0ULL; x < (1ULL << bits); x++) used[x] = 0; fail = 0; for(x = 0ULL; x < (1ULL << bits); x++) { index = (blockers[x] * magic) >> (64 - bits); if(used[index] == 0) { used[index] = solution[x]; b = popcnt(blockers[x] * magic); if(ibits[sq] < b) ibits[sq] = b; } else if(used[index] != solution[x]) { fail = 1; break; } } if(!fail) { mbits[sq] = bit_trials; return magic; } trials++; if((trials > MAX_TRIALS) && (bit_trials < bits)) { bit_trials++; trials = 0; } } }
int Cdu::count_points_bitmaps (int nwords, unsigned *bmps, const vector<Window> & ws) { if(coords.size() == 1) { dimpair_t dp = coords[0]; const Window &w = ws[dp.win]; // 1-d, npoints = window.width * window.max npoints = w.width * w.max; } else { // multi-d, do real point counting // new variable for points, to satisfy OpenMP int local_npoints = 0; #pragma omp parallel for reduction(+:local_npoints) for(int iword = 0; iword < nwords; iword++) { unsigned word = ~0u; for(int icoord = 0; icoord < coords.size(); icoord++) word &= BMPS(coords[icoord].win, iword); local_npoints += popcnt(word); } // for(iword) npoints = local_npoints; } return npoints; } // count_points
void BVH4Intersector4Chunk<types,robust,PrimitiveIntersector4>::intersect(sseb* valid_i, BVH4* bvh, Ray4& ray) { /* load ray */ const sseb valid0 = *valid_i; const sse3f rdir = rcp_safe(ray.dir); const sse3f org(ray.org), org_rdir = org * rdir; ssef ray_tnear = select(valid0,ray.tnear,ssef(pos_inf)); ssef ray_tfar = select(valid0,ray.tfar ,ssef(neg_inf)); const ssef inf = ssef(pos_inf); Precalculations pre(valid0,ray); /* allocate stack and push root node */ ssef stack_near[stackSize]; NodeRef stack_node[stackSize]; stack_node[0] = BVH4::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* stackEnd = stack_node+stackSize; NodeRef* __restrict__ sptr_node = stack_node + 2; ssef* __restrict__ sptr_near = stack_near + 2; while (1) { /* pop next node from stack */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; NodeRef cur = *sptr_node; if (unlikely(cur == BVH4::invalidNode)) { assert(sptr_node == stack_node); break; } /* cull node if behind closest hit point */ ssef curDist = *sptr_near; if (unlikely(none(ray_tfar > curDist))) continue; while (1) { /* process normal nodes */ if (likely((types & 0x1) && cur.isNode())) { const sseb valid_node = ray_tfar > curDist; STAT3(normal.trav_nodes,1,popcnt(valid_node),8); const Node* __restrict__ const node = cur.node(); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; cur = *sptr_node; curDist = *sptr_near; #pragma unroll(4) for (unsigned i=0; i<BVH4::N; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH4::emptyNode)) break; ssef lnearP; const sseb lhit = node->intersect<robust>(i,org,rdir,org_rdir,ray_tnear,ray_tfar,lnearP); /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); const ssef childDist = select(lhit,lnearP,inf); const NodeRef child = node->children[i]; assert(child != BVH4::emptyNode); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = cur; *(sptr_near-1) = curDist; curDist = childDist; cur = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } } /* process motion blur nodes */ else if (likely((types & 0x10) && cur.isNodeMB())) { const sseb valid_node = ray_tfar > curDist; STAT3(normal.trav_nodes,1,popcnt(valid_node),8); const BVH4::NodeMB* __restrict__ const node = cur.nodeMB(); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; cur = *sptr_node; curDist = *sptr_near; #pragma unroll(4) for (unsigned i=0; i<BVH4::N; i++) { const NodeRef child = node->child(i); if (unlikely(child == BVH4::emptyNode)) break; ssef lnearP; const sseb lhit = node->intersect(i,org,rdir,org_rdir,ray_tnear,ray_tfar,ray.time,lnearP); /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); assert(child != BVH4::emptyNode); const ssef childDist = select(lhit,lnearP,inf); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = cur; *(sptr_near-1) = curDist; curDist = childDist; cur = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } } else break; }
void BVH4iIntersector4Chunk<TriangleIntersector4>::occluded(sseb* valid_i, BVH4i* bvh, Ray4& ray) { /* load node and primitive array */ const Node * __restrict__ nodes = (Node *)bvh->nodePtr(); const Triangle * __restrict__ accel = (Triangle*)bvh->triPtr(); /* load ray */ const sseb valid = *valid_i; sseb terminated = !valid; const sse3f rdir = rcp_safe(ray.dir); const sse3f org_rdir = ray.org * rdir; ssef ray_tnear = select(valid,ray.tnear,pos_inf); ssef ray_tfar = select(valid,ray.tfar ,neg_inf); const ssef inf = ssef(pos_inf); /* allocate stack and push root node */ ssef stack_near[3*BVH4i::maxDepth+1]; NodeRef stack_node[3*BVH4i::maxDepth+1]; stack_node[0] = BVH4i::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* __restrict__ sptr_node = stack_node + 2; ssef* __restrict__ sptr_near = stack_near + 2; while (1) { /* pop next node from stack */ sptr_node--; sptr_near--; NodeRef curNode = *sptr_node; if (unlikely(curNode == BVH4i::invalidNode)) break; /* cull node if behind closest hit point */ ssef curDist = *sptr_near; if (unlikely(none(ray_tfar > curDist))) continue; while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf())) break; const sseb valid_node = ray_tfar > curDist; STAT3(shadow.trav_nodes,1,popcnt(valid_node),4); const Node* __restrict__ const node = curNode.node(nodes); /* pop of next node */ sptr_node--; sptr_near--; curNode = *sptr_node; // FIXME: this trick creates issues with stack depth curDist = *sptr_near; #pragma unroll(4) for (unsigned i=0; i<4; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH4i::emptyNode)) break; #if defined(__AVX2__) const ssef lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x); const ssef lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y); const ssef lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z); const ssef lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x); const ssef lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y); const ssef lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z); const ssef lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); const ssef lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); const sseb lhit = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar); #else const ssef lclipMinX = node->lower_x[i] * rdir.x - org_rdir.x; const ssef lclipMinY = node->lower_y[i] * rdir.y - org_rdir.y; const ssef lclipMinZ = node->lower_z[i] * rdir.z - org_rdir.z; const ssef lclipMaxX = node->upper_x[i] * rdir.x - org_rdir.x; const ssef lclipMaxY = node->upper_y[i] * rdir.y - org_rdir.y; const ssef lclipMaxZ = node->upper_z[i] * rdir.z - org_rdir.z; const ssef lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const ssef lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const sseb lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); #endif /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { const ssef childDist = select(lhit,lnearP,inf); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = curNode; *(sptr_near-1) = curDist; curDist = childDist; curNode = child; } /* push hit child onto stack*/ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } assert(sptr_node - stack_node < BVH4i::maxDepth); } } } /* return if stack is empty */ if (unlikely(curNode == BVH4i::invalidNode)) break; /* intersect leaf */ const sseb valid_leaf = ray_tfar > curDist; STAT3(shadow.trav_leaves,1,popcnt(valid_leaf),4); size_t items; const Triangle* tri = (Triangle*) curNode.leaf(accel, items); terminated |= TriangleIntersector4::occluded(!terminated,ray,tri,items,bvh->geometry); if (all(terminated)) break; ray_tfar = select(terminated,neg_inf,ray_tfar); } store4i(valid & terminated,&ray.geomID,0); AVX_ZERO_UPPER(); }
void BVH8Intersector16Chunk<PrimitiveIntersector16>::occluded(bool16* valid_i, BVH8* bvh, Ray16& ray) { #if defined(__AVX512__) /* load ray */ const bool16 valid = *valid_i; bool16 terminated = !valid; const Vec3f16 rdir = rcp_safe(ray.dir); const Vec3f16 org_rdir = ray.org * rdir; float16 ray_tnear = select(valid,ray.tnear,pos_inf); float16 ray_tfar = select(valid,ray.tfar ,neg_inf); const float16 inf = float16(pos_inf); Precalculations pre(valid,ray); /* allocate stack and push root node */ float16 stack_near[3*BVH8::maxDepth+1]; NodeRef stack_node[3*BVH8::maxDepth+1]; stack_node[0] = BVH8::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* __restrict__ sptr_node = stack_node + 2; float16* __restrict__ sptr_near = stack_near + 2; while (1) { /* pop next node from stack */ sptr_node--; sptr_near--; NodeRef cur = *sptr_node; if (unlikely(cur == BVH8::invalidNode)) break; /* cull node if behind closest hit point */ float16 curDist = *sptr_near; if (unlikely(none(ray_tfar > curDist))) continue; while (1) { /* test if this is a leaf node */ if (unlikely(cur.isLeaf())) break; const bool16 valid_node = ray_tfar > curDist; STAT3(shadow.trav_nodes,1,popcnt(valid_node),8); const Node* __restrict__ const node = (Node*)cur.node(); /* pop of next node */ sptr_node--; sptr_near--; cur = *sptr_node; // FIXME: this trick creates issues with stack depth curDist = *sptr_near; for (unsigned i=0; i<BVH8::N; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH8::emptyNode)) break; const float16 lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x); const float16 lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y); const float16 lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z); const float16 lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x); const float16 lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y); const float16 lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z); const float16 lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const float16 lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const bool16 lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { const float16 childDist = select(lhit,lnearP,inf); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = cur; *(sptr_near-1) = curDist; curDist = childDist; cur = child; } /* push hit child onto stack*/ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } assert(sptr_node - stack_node < BVH8::maxDepth); } } } /* return if stack is empty */ if (unlikely(cur == BVH8::invalidNode)) break; /* intersect leaf */ assert(cur != BVH8::emptyNode); const bool16 valid_leaf = ray_tfar > curDist; STAT3(shadow.trav_leaves,1,popcnt(valid_leaf),8); size_t items; const Triangle* tri = (Triangle*) cur.leaf(items); terminated |= PrimitiveIntersector16::occluded(!terminated,pre,ray,tri,items,bvh->scene); if (all(terminated)) break; ray_tfar = select(terminated,neg_inf,ray_tfar); } store16i(valid & terminated,&ray.geomID,0); AVX_ZERO_UPPER(); #endif }
void BVH2Intersector8Chunk<TriangleIntersector>::intersect(const BVH2Intersector8Chunk* This, Ray8& ray, const __m256 valid_i) { avxb valid = valid_i; const BVH2* bvh = This->bvh; STAT3(normal.travs,1,popcnt(valid),8); struct StackItem { NodeRef ptr; avxf dist; }; StackItem stack[1+BVH2::maxDepth]; //!< stack of nodes that still need to get traversed StackItem* stackPtr = stack; //!< current stack pointer NodeRef cur = bvh->root; //!< in cur we track the ID of the current node /* let inactive rays miss all boxes */ const avx3f rdir = rcp_safe(ray.dir); ray.tfar = select(valid,ray.tfar,avxf(neg_inf)); while (true) { /*! downtraversal loop */ while (likely(cur.isNode())) { STAT3(normal.trav_nodes,1,popcnt(valid),8); /* intersect packet with box of both children */ const Node* node = cur.node(); avxf dist0; size_t hit0 = intersectBox(ray.org,rdir,ray.tnear,ray.tfar,node,0,dist0); avxf dist1; size_t hit1 = intersectBox(ray.org,rdir,ray.tnear,ray.tfar,node,1,dist1); /*! if two children hit, push far node onto stack and continue with closer node */ if (likely(hit0 != 0 && hit1 != 0)) { if (any(valid & (dist0 < dist1))) { stackPtr->ptr = node->child(1); stackPtr->dist = dist1; stackPtr++; cur = node->child(0); } else { stackPtr->ptr = node->child(0); stackPtr->dist = dist0; stackPtr++; cur = node->child(1); } } /*! if one child hit, continue with that child */ else { if (likely(hit0 != 0)) cur = node->child(0); else if (likely(hit1 != 0)) cur = node->child(1); else goto pop_node; } } /*! leaf node, intersect all triangles */ { STAT3(normal.trav_leaves,1,popcnt(valid),8); size_t num; Triangle* tri = (Triangle*) cur.leaf(NULL,num); for (size_t i=0; i<num; i++) { TriangleIntersector::intersect(valid,ray,tri[i],bvh->vertices); } } /*! pop next node from stack */ pop_node: if (unlikely(stackPtr == stack)) break; --stackPtr; cur = stackPtr->ptr; if (unlikely(all(stackPtr->dist > ray.tfar))) goto pop_node; } }
void BVH4Intersector8Chunk<types, robust, PrimitiveIntersector8>::intersect(bool8* valid_i, BVH4* bvh, Ray8& ray) { /* verify correct input */ bool8 valid0 = *valid_i; #if defined(RTCORE_IGNORE_INVALID_RAYS) valid0 &= ray.valid(); #endif assert(all(valid0,ray.tnear > -FLT_MIN)); assert(!(types & BVH4::FLAG_NODE_MB) || all(valid0,ray.time >= 0.0f & ray.time <= 1.0f)); /* load ray */ const Vec3f8 rdir = rcp_safe(ray.dir); const Vec3f8 org(ray.org), org_rdir = org * rdir; float8 ray_tnear = select(valid0,ray.tnear,pos_inf); float8 ray_tfar = select(valid0,ray.tfar ,neg_inf); const float8 inf = float8(pos_inf); Precalculations pre(valid0,ray); /* allocate stack and push root node */ float8 stack_near[stackSize]; NodeRef stack_node[stackSize]; stack_node[0] = BVH4::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* stackEnd = stack_node+stackSize; NodeRef* __restrict__ sptr_node = stack_node + 2; float8* __restrict__ sptr_near = stack_near + 2; while (1) { /* pop next node from stack */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; NodeRef cur = *sptr_node; if (unlikely(cur == BVH4::invalidNode)) { assert(sptr_node == stack_node); break; } /* cull node if behind closest hit point */ float8 curDist = *sptr_near; if (unlikely(none(ray_tfar > curDist))) continue; while (1) { /* process normal nodes */ if (likely((types & 0x1) && cur.isNode())) { const bool8 valid_node = ray_tfar > curDist; STAT3(normal.trav_nodes,1,popcnt(valid_node),8); const Node* __restrict__ const node = cur.node(); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; cur = *sptr_node; curDist = *sptr_near; #pragma unroll(4) for (unsigned i=0; i<BVH4::N; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH4::emptyNode)) break; float8 lnearP; const bool8 lhit = intersect8_node<robust>(node,i,org,rdir,org_rdir,ray_tnear,ray_tfar,lnearP); /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); assert(child != BVH4::emptyNode); const float8 childDist = select(lhit,lnearP,inf); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = cur; *(sptr_near-1) = curDist; curDist = childDist; cur = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } } /* process motion blur nodes */ else if (likely((types & 0x10) && cur.isNodeMB())) { const bool8 valid_node = ray_tfar > curDist; STAT3(normal.trav_nodes,1,popcnt(valid_node),8); const BVH4::NodeMB* __restrict__ const node = cur.nodeMB(); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; cur = *sptr_node; curDist = *sptr_near; #pragma unroll(4) for (unsigned i=0; i<BVH4::N; i++) { const NodeRef child = node->child(i); if (unlikely(child == BVH4::emptyNode)) break; float8 lnearP; const bool8 lhit = intersect_node(node,i,org,rdir,org_rdir,ray_tnear,ray_tfar,ray.time,lnearP); /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); assert(child != BVH4::emptyNode); const float8 childDist = select(lhit,lnearP,inf); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = cur; *(sptr_near-1) = curDist; curDist = childDist; cur = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } } else break; }
void BVH8iIntersector8Hybrid<TriangleIntersector8>::occluded(avxb* valid_i, BVH8i* bvh, Ray8& ray) { /* load ray */ const avxb valid = *valid_i; avxb terminated = !valid; avx3f ray_org = ray.org, ray_dir = ray.dir; avxf ray_tnear = ray.tnear, ray_tfar = ray.tfar; #if defined(__FIX_RAYS__) const avxf float_range = 0.1f*FLT_MAX; ray_org = clamp(ray_org,avx3f(-float_range),avx3f(+float_range)); ray_dir = clamp(ray_dir,avx3f(-float_range),avx3f(+float_range)); ray_tnear = max(ray_tnear,FLT_MIN); ray_tfar = min(ray_tfar,float(inf)); #endif const avx3f rdir = rcp_safe(ray_dir); const avx3f org(ray_org), org_rdir = org * rdir; ray_tnear = select(valid,ray_tnear,avxf(pos_inf)); ray_tfar = select(valid,ray_tfar ,avxf(neg_inf)); const avxf inf = avxf(pos_inf); /* compute near/far per ray */ avx3i nearXYZ; nearXYZ.x = select(rdir.x >= 0.0f,avxi(0*(int)sizeof(avxf)),avxi(1*(int)sizeof(avxf))); nearXYZ.y = select(rdir.y >= 0.0f,avxi(2*(int)sizeof(avxf)),avxi(3*(int)sizeof(avxf))); nearXYZ.z = select(rdir.z >= 0.0f,avxi(4*(int)sizeof(avxf)),avxi(5*(int)sizeof(avxf))); /* allocate stack and push root node */ avxf stack_near[stackSizeChunk]; NodeRef stack_node[stackSizeChunk]; stack_node[0] = BVH4i::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* stackEnd = stack_node+stackSizeChunk; NodeRef* __restrict__ sptr_node = stack_node + 2; avxf* __restrict__ sptr_near = stack_near + 2; const Node * __restrict__ nodes = (Node *)bvh->nodePtr(); const Triangle * __restrict__ accel = (Triangle*)bvh->triPtr(); while (1) { /* pop next node from stack */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; NodeRef curNode = *sptr_node; if (unlikely(curNode == BVH4i::invalidNode)) { assert(sptr_node == stack_node); break; } /* cull node if behind closest hit point */ avxf curDist = *sptr_near; const avxb active = curDist < ray_tfar; if (unlikely(none(active))) continue; /* switch to single ray traversal */ #if !defined(__WIN32__) || defined(__X86_64__) size_t bits = movemask(active); if (unlikely(__popcnt(bits) <= SWITCH_THRESHOLD)) { for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) { if (occluded1(bvh,curNode,i,ray,ray_org,ray_dir,rdir,ray_tnear,ray_tfar,nearXYZ)) terminated[i] = -1; } if (all(terminated)) break; ray_tfar = select(terminated,avxf(neg_inf),ray_tfar); continue; } #endif while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf())) break; const avxb valid_node = ray_tfar > curDist; STAT3(shadow.trav_nodes,1,popcnt(valid_node),8); const Node* __restrict__ const node = (Node*)curNode.node(nodes); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; curNode = *sptr_node; curDist = *sptr_near; for (unsigned i=0; i<8; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH4i::emptyNode)) break; #if defined(__AVX2__) const avxf lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x); const avxf lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y); const avxf lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z); const avxf lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x); const avxf lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y); const avxf lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z); const avxf lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); const avxf lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); const avxb lhit = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar); #else const avxf lclipMinX = (node->lower_x[i] - org.x) * rdir.x; const avxf lclipMinY = (node->lower_y[i] - org.y) * rdir.y; const avxf lclipMinZ = (node->lower_z[i] - org.z) * rdir.z; const avxf lclipMaxX = (node->upper_x[i] - org.x) * rdir.x; const avxf lclipMaxY = (node->upper_y[i] - org.y) * rdir.y; const avxf lclipMaxZ = (node->upper_z[i] - org.z) * rdir.z; const avxf lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const avxf lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const avxb lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); #endif /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); assert(child != BVH4i::emptyNode); const avxf childDist = select(lhit,lnearP,inf); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = curNode; *(sptr_near-1) = curDist; curDist = childDist; curNode = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } } /* return if stack is empty */ if (unlikely(curNode == BVH4i::invalidNode)) { assert(sptr_node == stack_node); break; } /* intersect leaf */ const avxb valid_leaf = ray_tfar > curDist; STAT3(shadow.trav_leaves,1,popcnt(valid_leaf),8); size_t items; const Triangle* prim = (Triangle*) curNode.leaf(accel,items); terminated |= TriangleIntersector8::occluded(!terminated,ray,prim,items,bvh->geometry); if (all(terminated)) break; ray_tfar = select(terminated,avxf(neg_inf),ray_tfar); } store8i(valid & terminated,&ray.geomID,0); AVX_ZERO_UPPER(); }
void BVH4Intersector4Hybrid<PrimitiveIntersector4>::intersect(sseb* valid_i, BVH4* bvh, Ray4& ray) { /* load ray */ const sseb valid0 = *valid_i; sse3f ray_org = ray.org, ray_dir = ray.dir; ssef ray_tnear = ray.tnear, ray_tfar = ray.tfar; #if defined(__FIX_RAYS__) const ssef float_range = 0.1f*FLT_MAX; ray_org = clamp(ray_org,sse3f(-float_range),sse3f(+float_range)); ray_dir = clamp(ray_dir,sse3f(-float_range),sse3f(+float_range)); ray_tnear = max(ray_tnear,FLT_MIN); ray_tfar = min(ray_tfar,float(inf)); #endif const sse3f rdir = rcp_safe(ray_dir); const sse3f org(ray_org), org_rdir = org * rdir; ray_tnear = select(valid0,ray_tnear,ssef(pos_inf)); ray_tfar = select(valid0,ray_tfar ,ssef(neg_inf)); const ssef inf = ssef(pos_inf); /* allocate stack and push root node */ ssef stack_near[stackSizeChunk]; NodeRef stack_node[stackSizeChunk]; stack_node[0] = BVH4::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* stackEnd = stack_node+stackSizeChunk; NodeRef* __restrict__ sptr_node = stack_node + 2; ssef* __restrict__ sptr_near = stack_near + 2; while (1) { /* pop next node from stack */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; NodeRef curNode = *sptr_node; if (unlikely(curNode == BVH4::invalidNode)) { assert(sptr_node == stack_node); break; } /* cull node if behind closest hit point */ ssef curDist = *sptr_near; const sseb active = curDist < ray_tfar; if (unlikely(none(active))) continue; /* switch to single ray traversal */ #if !defined(__WIN32__) || defined(__X86_64__) size_t bits = movemask(active); if (unlikely(__popcnt(bits) <= SWITCH_THRESHOLD)) { for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) { intersect1(bvh,curNode,i,ray,ray_org,ray_dir,rdir,ray_tnear,ray_tfar); } ray_tfar = ray.tfar; continue; } #endif while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf())) break; const sseb valid_node = ray_tfar > curDist; STAT3(normal.trav_nodes,1,popcnt(valid_node),4); const Node* __restrict__ const node = curNode.node(); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; curNode = *sptr_node; curDist = *sptr_near; #pragma unroll(4) for (unsigned i=0; i<4; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH4::emptyNode)) break; #if defined(__AVX2__) const ssef lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x); const ssef lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y); const ssef lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z); const ssef lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x); const ssef lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y); const ssef lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z); #else const ssef lclipMinX = (node->lower_x[i] - org.x) * rdir.x; const ssef lclipMinY = (node->lower_y[i] - org.y) * rdir.y; const ssef lclipMinZ = (node->lower_z[i] - org.z) * rdir.z; const ssef lclipMaxX = (node->upper_x[i] - org.x) * rdir.x; const ssef lclipMaxY = (node->upper_y[i] - org.y) * rdir.y; const ssef lclipMaxZ = (node->upper_z[i] - org.z) * rdir.z; #endif #if defined(__SSE4_1__) const ssef lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); const ssef lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); const sseb lhit = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar); #else const ssef lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const ssef lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const sseb lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); #endif /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); const ssef childDist = select(lhit,lnearP,inf); const NodeRef child = node->children[i]; assert(child != BVH4::emptyNode); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = curNode; *(sptr_near-1) = curDist; curDist = childDist; curNode = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } } /* return if stack is empty */ if (unlikely(curNode == BVH4::invalidNode)) { assert(sptr_node == stack_node); break; } /* intersect leaf */ const sseb valid_leaf = ray_tfar > curDist; STAT3(normal.trav_leaves,1,popcnt(valid_leaf),4); size_t items; const Primitive* prim = (Primitive*) curNode.leaf(items); PrimitiveIntersector4::intersect(valid_leaf,ray,prim,items,bvh->geometry); ray_tfar = select(valid_leaf,ray.tfar,ray_tfar); } AVX_ZERO_UPPER(); }
void BVH4Intersector4Chunk<PrimitiveIntersector4>::intersect(sseb* valid_i, BVH4* bvh, Ray4& ray) { /* load ray */ const sseb valid0 = *valid_i; const sse3f rdir = rcp_safe(ray.dir); const sse3f org(ray.org), org_rdir = org * rdir; ssef ray_tnear = select(valid0,ray.tnear,ssef(pos_inf)); ssef ray_tfar = select(valid0,ray.tfar ,ssef(neg_inf)); const ssef inf = ssef(pos_inf); Precalculations pre(valid0,ray); /* allocate stack and push root node */ ssef stack_near[stackSize]; NodeRef stack_node[stackSize]; stack_node[0] = BVH4::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* stackEnd = stack_node+stackSize; NodeRef* __restrict__ sptr_node = stack_node + 2; ssef* __restrict__ sptr_near = stack_near + 2; while (1) { /* pop next node from stack */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; NodeRef curNode = *sptr_node; if (unlikely(curNode == BVH4::invalidNode)) { assert(sptr_node == stack_node); break; } /* cull node if behind closest hit point */ ssef curDist = *sptr_near; if (unlikely(none(ray_tfar > curDist))) continue; while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf())) break; const sseb valid_node = ray_tfar > curDist; STAT3(normal.trav_nodes,1,popcnt(valid_node),4); const Node* __restrict__ const node = curNode.node(); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; curNode = *sptr_node; curDist = *sptr_near; #pragma unroll(4) for (unsigned i=0; i<BVH4::N; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH4::emptyNode)) break; #if defined(__AVX2__) const ssef lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x); const ssef lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y); const ssef lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z); const ssef lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x); const ssef lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y); const ssef lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z); #else const ssef lclipMinX = (node->lower_x[i] - org.x) * rdir.x; const ssef lclipMinY = (node->lower_y[i] - org.y) * rdir.y; const ssef lclipMinZ = (node->lower_z[i] - org.z) * rdir.z; const ssef lclipMaxX = (node->upper_x[i] - org.x) * rdir.x; const ssef lclipMaxY = (node->upper_y[i] - org.y) * rdir.y; const ssef lclipMaxZ = (node->upper_z[i] - org.z) * rdir.z; #endif #if defined(__SSE4_1__) const ssef lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); const ssef lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); const sseb lhit = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar); #else const ssef lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const ssef lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const sseb lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); #endif /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); const ssef childDist = select(lhit,lnearP,inf); const NodeRef child = node->children[i]; assert(child != BVH4::emptyNode); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = curNode; *(sptr_near-1) = curDist; curDist = childDist; curNode = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } } /* return if stack is empty */ if (unlikely(curNode == BVH4::invalidNode)) { assert(sptr_node == stack_node); break; } /* intersect leaf */ const sseb valid_leaf = ray_tfar > curDist; STAT3(normal.trav_leaves,1,popcnt(valid_leaf),4); size_t items; const Primitive* prim = (Primitive*) curNode.leaf(items); PrimitiveIntersector4::intersect(valid_leaf,pre,ray,prim,items,bvh->geometry); ray_tfar = select(valid_leaf,ray.tfar,ray_tfar); } AVX_ZERO_UPPER(); }
void BVH4Intersector4Hybrid<types,robust,PrimitiveIntersector4>::intersect(bool4* valid_i, BVH4* bvh, Ray4& ray) { /* verify correct input */ bool4 valid0 = *valid_i; #if defined(RTCORE_IGNORE_INVALID_RAYS) valid0 &= ray.valid(); #endif assert(all(valid0,ray.tnear > -FLT_MIN)); assert(!(types & BVH4::FLAG_NODE_MB) || all(valid0,ray.time >= 0.0f & ray.time <= 1.0f)); /* load ray */ Vec3f4 ray_org = ray.org; Vec3f4 ray_dir = ray.dir; float4 ray_tnear = ray.tnear, ray_tfar = ray.tfar; const Vec3f4 rdir = rcp_safe(ray_dir); const Vec3f4 org(ray_org), org_rdir = org * rdir; ray_tnear = select(valid0,ray_tnear,float4(pos_inf)); ray_tfar = select(valid0,ray_tfar ,float4(neg_inf)); const float4 inf = float4(pos_inf); Precalculations pre(valid0,ray); /* compute near/far per ray */ Vec3i4 nearXYZ; nearXYZ.x = select(rdir.x >= 0.0f,int4(0*(int)sizeof(float4)),int4(1*(int)sizeof(float4))); nearXYZ.y = select(rdir.y >= 0.0f,int4(2*(int)sizeof(float4)),int4(3*(int)sizeof(float4))); nearXYZ.z = select(rdir.z >= 0.0f,int4(4*(int)sizeof(float4)),int4(5*(int)sizeof(float4))); /* allocate stack and push root node */ float4 stack_near[stackSizeChunk]; NodeRef stack_node[stackSizeChunk]; stack_node[0] = BVH4::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* stackEnd = stack_node+stackSizeChunk; NodeRef* __restrict__ sptr_node = stack_node + 2; float4* __restrict__ sptr_near = stack_near + 2; while (1) pop: { /* pop next node from stack */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; NodeRef cur = *sptr_node; if (unlikely(cur == BVH4::invalidNode)) { assert(sptr_node == stack_node); break; } /* cull node if behind closest hit point */ float4 curDist = *sptr_near; const bool4 active = curDist < ray_tfar; if (unlikely(none(active))) continue; /* switch to single ray traversal */ #if !defined(__WIN32__) || defined(__X86_64__) size_t bits = movemask(active); if (unlikely(__popcnt(bits) <= SWITCH_THRESHOLD)) { for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) { BVH4Intersector4Single<types,robust,PrimitiveIntersector4>::intersect1(bvh, cur, i, pre, ray, ray_org, ray_dir, rdir, ray_tnear, ray_tfar, nearXYZ); } ray_tfar = min(ray_tfar,ray.tfar); continue; } #endif while (1) { /* process normal nodes */ if (likely((types & 0x1) && cur.isNode())) { const bool4 valid_node = ray_tfar > curDist; STAT3(normal.trav_nodes,1,popcnt(valid_node),4); const Node* __restrict__ const node = cur.node(); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; cur = *sptr_node; curDist = *sptr_near; #pragma unroll(4) for (unsigned i=0; i<BVH4::N; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH4::emptyNode)) break; float4 lnearP; const bool4 lhit = intersect_node<robust>(node,i,org,rdir,org_rdir,ray_tnear,ray_tfar,lnearP); /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); assert(child != BVH4::emptyNode); const float4 childDist = select(lhit,lnearP,inf); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = cur; *(sptr_near-1) = curDist; curDist = childDist; cur = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } #if SWITCH_DURING_DOWN_TRAVERSAL == 1 // seems to be the best place for testing utilization if (unlikely(popcnt(ray_tfar > curDist) <= SWITCH_THRESHOLD)) { *sptr_node++ = cur; *sptr_near++ = curDist; goto pop; } #endif } /* process motion blur nodes */ else if (likely((types & 0x10) && cur.isNodeMB())) { const bool4 valid_node = ray_tfar > curDist; STAT3(normal.trav_nodes,1,popcnt(valid_node),4); const BVH4::NodeMB* __restrict__ const node = cur.nodeMB(); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; cur = *sptr_node; curDist = *sptr_near; #pragma unroll(4) for (unsigned i=0; i<BVH4::N; i++) { const NodeRef child = node->child(i); if (unlikely(child == BVH4::emptyNode)) break; float4 lnearP; const bool4 lhit = intersect_node(node,i,org,rdir,org_rdir,ray_tnear,ray_tfar,ray.time,lnearP); /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); assert(child != BVH4::emptyNode); const float4 childDist = select(lhit,lnearP,inf); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = cur; *(sptr_near-1) = curDist; curDist = childDist; cur = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } #if SWITCH_DURING_DOWN_TRAVERSAL == 1 // seems to be the best place for testing utilization if (unlikely(popcnt(ray_tfar > curDist) <= SWITCH_THRESHOLD)) { *sptr_node++ = cur; *sptr_near++ = curDist; goto pop; } #endif } else break; }
pawn_entry_t *eval_pawn_struct() { int sq, atk_sq, file, rank; bitboard_t t, sq_mask, front, wpawns, bpawns; bool passed, backward, doubled, isolated, connected; pawn_entry_t *p = pnt + (pos->phash & PMASK); if(p->pkey == pos->phash) return (p); p->scoremg = 0; p->scoreeg = 0; p->passers = 0ULL; p->pkey = pos->phash; wpawns = pos->occ[WP]; bpawns = pos->occ[BP]; //storing pawn attacks: p->attacks[W] = ((pos->occ[WP] & ~FMASK_A) << 7); p->attacks[W] |= ((pos->occ[WP] & ~FMASK_H) << 9); p->attacks[B] = ((pos->occ[BP] & ~FMASK_A) >> 9); p->attacks[B] |= ((pos->occ[BP] & ~FMASK_H) >> 7); t = pos->occ[WP]; while(t) { sq = bitscanf(t); bitclear(t, sq); file = File(sq); rank = Rank(sq); //psq_values: p->scoremg += psq_pawn_mg[W][sq]; //collecting the pawn type info: doubled = (true && (wpawns & rook_backward[B][sq])); isolated = (true && (!(wpawns & isolated_mask[file]))); connected = (true && (wpawns & connected_mask[W][sq])); passed = (!doubled && (!(bpawns & passer_mask[W][sq]))); backward = (!isolated && !connected && !passed && (!(wpawns & backward_mask[W][sq]))); //if pawn can advance (not blocked by any other pawn), //but cannot advance safely, it's backward: if(backward) { front = rook_backward[B][sq]; backward = false; while(front) { atk_sq = bitscanf(front); sq_mask = (1ULL << atk_sq); if((sq_mask & (wpawns | bpawns)) || (sq_mask & p->attacks[W])) break; if(sq_mask & p->attacks[B]) { backward = true; break; } bitclear(front, atk_sq); } } //penalties and bonuses: if(backward) { p->scoremg -= backward_penalty_mg[file]; p->scoreeg -= backward_penalty_eg[file]; if(!(bpawns & rook_backward[B][sq])) p->scoremg -= backward_on_semi_op_mg; if(((1ULL << (sq+8)) & p->attacks[B]) && ((1ULL << (sq+8)) & boutposts)) p->scoremg -= weak_square_mg; } if(doubled) { p->scoremg -= doubled_penalty_mg[file]; p->scoreeg -= doubled_penalty_eg[file]; } if(isolated) { p->scoremg -= isolated_penalty_mg[file]; p->scoreeg -= isolated_penalty_eg[file]; if(!(bpawns & rook_backward[B][sq])) p->scoremg -= isolated_on_semi_op_mg; } if(connected) { p->scoremg += pawn_chain_element_mg[file]; p->scoreeg += pawn_chain_element_eg[file]; } if(passed) { p->passers |= (1ULL << sq); p->scoremg += passer_bonus_mg[W][rank]; if(connected) p->scoremg += connected_passer_mg[W][rank]; } else { if(!(bpawns & rook_backward[B][sq])) { if((popcnt(wpawns & backward_mask[W][sq])) >= \ (popcnt(bpawns & backward_mask[B][sq+8]))){ p->scoremg += candidate_mg[W][rank]; p->scoreeg += candidate_eg[W][rank]; } } } } t = pos->occ[BP]; while(t) { sq = bitscanf(t); bitclear(t, sq); file = File(sq); rank = Rank(sq); //psq_values: p->scoremg -= psq_pawn_mg[B][sq]; //collecting the pawn type info: doubled = (true && (bpawns & rook_backward[W][sq])); isolated = (true && (!(bpawns & isolated_mask[file]))); connected = (true && (bpawns & connected_mask[B][sq])); passed = (!doubled && (!(wpawns & passer_mask[B][sq]))); backward = (!isolated && !connected && !passed && (!(bpawns & backward_mask[B][sq]))); if(backward) { front = rook_backward[W][sq]; backward = false; while(front) { atk_sq = bitscanr(front); sq_mask = (1ULL << atk_sq); if((sq_mask & (wpawns | bpawns)) || (sq_mask & p->attacks[B])) break; if(sq_mask & p->attacks[W]) { backward = true; break; } bitclear(front, atk_sq); } } //penalties and bonuses: if(backward) { p->scoremg += backward_penalty_mg[file]; p->scoreeg += backward_penalty_eg[file]; if(!(wpawns & rook_backward[W][sq])) p->scoremg += backward_on_semi_op_mg; if(((1ULL << (sq-8)) & p->attacks[W]) && ((1ULL << (sq-8)) & woutposts)) p->scoremg += weak_square_mg; } if(doubled) { p->scoremg += doubled_penalty_mg[file]; p->scoreeg += doubled_penalty_eg[file]; } if(isolated) { p->scoremg += isolated_penalty_mg[file]; p->scoreeg += isolated_penalty_eg[file]; if(!(wpawns & rook_backward[W][sq])) p->scoremg += isolated_on_semi_op_mg; } if(connected) { p->scoremg -= pawn_chain_element_mg[file]; p->scoreeg -= pawn_chain_element_eg[file]; } if(passed) { p->passers |= (1ULL << sq); p->scoremg -= passer_bonus_mg[B][rank]; if(connected) p->scoremg -= connected_passer_mg[B][rank]; } else { if(!(wpawns & rook_backward[W][sq])) { if((popcnt(bpawns & backward_mask[B][sq])) >= \ (popcnt(wpawns & backward_mask[W][sq-8]))){ p->scoremg -= candidate_mg[B][rank]; p->scoreeg -= candidate_eg[B][rank]; } } } } //saving the pawn shield/storms: pawn_shield_store(p); return (p); }
void BVH4iIntersector16Hybrid<LeafIntersector,ENABLE_COMPRESSED_BVH4I_NODES>::intersect(mic_i* valid_i, BVH4i* bvh, Ray16& ray16) { /* near and node stack */ __aligned(64) mic_f stack_dist[3*BVH4i::maxDepth+1]; __aligned(64) NodeRef stack_node[3*BVH4i::maxDepth+1]; __aligned(64) NodeRef stack_node_single[3*BVH4i::maxDepth+1]; /* load ray */ const mic_m valid0 = *(mic_i*)valid_i != mic_i(0); const mic3f rdir16 = rcp_safe(ray16.dir); const mic3f org_rdir16 = ray16.org * rdir16; mic_f ray_tnear = select(valid0,ray16.tnear,pos_inf); mic_f ray_tfar = select(valid0,ray16.tfar ,neg_inf); const mic_f inf = mic_f(pos_inf); /* allocate stack and push root node */ stack_node[0] = BVH4i::invalidNode; stack_dist[0] = inf; stack_node[1] = bvh->root; stack_dist[1] = ray_tnear; NodeRef* __restrict__ sptr_node = stack_node + 2; mic_f* __restrict__ sptr_dist = stack_dist + 2; const Node * __restrict__ nodes = (Node *)bvh->nodePtr(); const Triangle1 * __restrict__ accel = (Triangle1*)bvh->triPtr(); while (1) pop: { /* pop next node from stack */ NodeRef curNode = *(sptr_node-1); mic_f curDist = *(sptr_dist-1); sptr_node--; sptr_dist--; const mic_m m_stackDist = ray_tfar > curDist; /* stack emppty ? */ if (unlikely(curNode == BVH4i::invalidNode)) break; /* cull node if behind closest hit point */ if (unlikely(none(m_stackDist))) continue; /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /* switch to single ray mode */ if (unlikely(countbits(m_stackDist) <= BVH4i::hybridSIMDUtilSwitchThreshold)) { float *__restrict__ stack_dist_single = (float*)sptr_dist; store16f(stack_dist_single,inf); /* traverse single ray */ long rayIndex = -1; while((rayIndex = bitscan64(rayIndex,m_stackDist)) != BITSCAN_NO_BIT_SET_64) { stack_node_single[0] = BVH4i::invalidNode; stack_node_single[1] = curNode; size_t sindex = 2; const mic_f org_xyz = loadAOS4to16f(rayIndex,ray16.org.x,ray16.org.y,ray16.org.z); const mic_f dir_xyz = loadAOS4to16f(rayIndex,ray16.dir.x,ray16.dir.y,ray16.dir.z); const mic_f rdir_xyz = loadAOS4to16f(rayIndex,rdir16.x,rdir16.y,rdir16.z); const mic_f org_rdir_xyz = org_xyz * rdir_xyz; const mic_f min_dist_xyz = broadcast1to16f(&ray16.tnear[rayIndex]); mic_f max_dist_xyz = broadcast1to16f(&ray16.tfar[rayIndex]); const unsigned int leaf_mask = BVH4I_LEAF_MASK; while (1) { NodeRef curNode = stack_node_single[sindex-1]; sindex--; traverse_single_intersect<ENABLE_COMPRESSED_BVH4I_NODES>(curNode, sindex, rdir_xyz, org_rdir_xyz, min_dist_xyz, max_dist_xyz, stack_node_single, stack_dist_single, nodes, leaf_mask); /* return if stack is empty */ if (unlikely(curNode == BVH4i::invalidNode)) break; /* intersect one ray against four triangles */ const bool hit = LeafIntersector::intersect(curNode, rayIndex, dir_xyz, org_xyz, min_dist_xyz, max_dist_xyz, ray16, accel, (Scene*)bvh->geometry); if (hit) compactStack(stack_node_single,stack_dist_single,sindex,max_dist_xyz); } } ray_tfar = select(valid0,ray16.tfar ,neg_inf); continue; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////// const unsigned int leaf_mask = BVH4I_LEAF_MASK; const mic3f org = ray16.org; const mic3f dir = ray16.dir; while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf(leaf_mask))) break; STAT3(normal.trav_nodes,1,popcnt(ray_tfar > curDist),16); const Node* __restrict__ const node = curNode.node(nodes); prefetch<PFHINT_L1>((mic_f*)node + 0); prefetch<PFHINT_L1>((mic_f*)node + 1); /* pop of next node */ sptr_node--; sptr_dist--; curNode = *sptr_node; curDist = *sptr_dist; #pragma unroll(4) for (unsigned int i=0; i<4; i++) { BVH4i::NodeRef child; mic_f lclipMinX,lclipMinY,lclipMinZ; mic_f lclipMaxX,lclipMaxY,lclipMaxZ; if (!ENABLE_COMPRESSED_BVH4I_NODES) { child = node->lower[i].child; lclipMinX = msub(node->lower[i].x,rdir16.x,org_rdir16.x); lclipMinY = msub(node->lower[i].y,rdir16.y,org_rdir16.y); lclipMinZ = msub(node->lower[i].z,rdir16.z,org_rdir16.z); lclipMaxX = msub(node->upper[i].x,rdir16.x,org_rdir16.x); lclipMaxY = msub(node->upper[i].y,rdir16.y,org_rdir16.y); lclipMaxZ = msub(node->upper[i].z,rdir16.z,org_rdir16.z); } else { BVH4i::QuantizedNode* __restrict__ const compressed_node = (BVH4i::QuantizedNode*)node; child = compressed_node->child(i); const mic_f startXYZ = compressed_node->decompress_startXYZ(); const mic_f diffXYZ = compressed_node->decompress_diffXYZ(); const mic_f clower = compressed_node->decompress_lowerXYZ(startXYZ,diffXYZ); const mic_f cupper = compressed_node->decompress_upperXYZ(startXYZ,diffXYZ); lclipMinX = msub(mic_f(clower[4*i+0]),rdir16.x,org_rdir16.x); lclipMinY = msub(mic_f(clower[4*i+1]),rdir16.y,org_rdir16.y); lclipMinZ = msub(mic_f(clower[4*i+2]),rdir16.z,org_rdir16.z); lclipMaxX = msub(mic_f(cupper[4*i+0]),rdir16.x,org_rdir16.x); lclipMaxY = msub(mic_f(cupper[4*i+1]),rdir16.y,org_rdir16.y); lclipMaxZ = msub(mic_f(cupper[4*i+2]),rdir16.z,org_rdir16.z); } if (unlikely(i >=2 && child == BVH4i::invalidNode)) break; const mic_f lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const mic_f lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const mic_m lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); const mic_f childDist = select(lhit,lnearP,inf); const mic_m m_child_dist = childDist < curDist; /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { sptr_node++; sptr_dist++; /* push cur node onto stack and continue with hit child */ if (any(m_child_dist)) { *(sptr_node-1) = curNode; *(sptr_dist-1) = curDist; curDist = childDist; curNode = child; } /* push hit child onto stack*/ else { *(sptr_node-1) = child; *(sptr_dist-1) = childDist; const char* __restrict__ const pnode = (char*)child.node(nodes); prefetch<PFHINT_L2>(pnode + 0); prefetch<PFHINT_L2>(pnode + 64); } assert(sptr_node - stack_node < BVH4i::maxDepth); } } #if SWITCH_ON_DOWN_TRAVERSAL == 1 const mic_m curUtil = ray_tfar > curDist; if (unlikely(countbits(curUtil) <= BVH4i::hybridSIMDUtilSwitchThreshold)) { *sptr_node++ = curNode; *sptr_dist++ = curDist; goto pop; } #endif } /* return if stack is empty */ if (unlikely(curNode == BVH4i::invalidNode)) break; /* intersect leaf */ const mic_m m_valid_leaf = ray_tfar > curDist; STAT3(normal.trav_leaves,1,popcnt(m_valid_leaf),16); LeafIntersector::intersect16(curNode,m_valid_leaf,dir,org,ray16,accel,(Scene*)bvh->geometry); ray_tfar = select(m_valid_leaf,ray16.tfar,ray_tfar); }
// Generate a return statement. // void GenerateReturn(SYM *sym, Statement *stmt) { AMODE *ap; int nn; int lab1; int cnt; // Generate code to evaluate the return expression. if( stmt != NULL && stmt->exp != NULL ) { initstack(); ap = GenerateExpression(stmt->exp,F_ALL & ~F_BREG,8); // Force return value into register 1 if( ap->preg != 1 ) { if (ap->mode == am_immed) GenerateDiadic(op_ldi, 0, makereg(1),ap); else if (ap->mode == am_reg) GenerateDiadic(op_mov, 0, makereg(1),ap); else GenerateDiadic(op_lw,0,makereg(1),ap); } } // Generate the return code only once. Branch to the return code for all returns. if( retlab == -1 ) { retlab = nextlabel++; GenerateLabel(retlab); // Unlock any semaphores that may have been set for (nn = lastsph - 1; nn >= 0; nn--) GenerateDiadic(op_sb,0,makereg(0),make_string(semaphores[nn])); if (sym->IsNocall) // nothing to do for nocall convention return; // Restore registers used as register variables. if( bsave_mask != 0 ) { cnt = (bitsset(bsave_mask)-1)*8; for (nn = 15; nn >=1 ; nn--) { if (bsave_mask & (1 << nn)) { GenerateDiadic(op_ss|op_lws,0,makebreg(nn),make_indexed(cnt,SP)); cnt -= 8; } } GenerateTriadic(op_addui,0,makereg(SP),makereg(SP),make_immed(popcnt(bsave_mask)*8)); } if( save_mask != 0 ) { cnt = (bitsset(save_mask)-1)*8; for (nn = 31; nn >=1 ; nn--) { if (save_mask & (1 << nn)) { GenerateDiadic(op_ss|op_lw,0,makereg(nn),make_indexed(cnt,SP)); cnt -= 8; } } GenerateTriadic(op_addui,0,makereg(SP),makereg(SP),make_immed(popcnt(save_mask)*8)); } // Unlink the stack // For a leaf routine the link register and exception link register doesn't need to be saved/restored. if (lc_auto || sym->NumParms > 0) { GenerateDiadic(op_mov,0,makereg(SP),makereg(regBP)); GenerateDiadic(op_ss|op_lw,0,makereg(regBP),make_indirect(regSP)); } if (!sym->IsLeaf) { if (exceptions) GenerateDiadic(op_ss|op_lws,0,makebreg(CLR),make_indexed(8,regSP)); GenerateDiadic(op_ss|op_lws,0,makebreg(LR),make_indexed(16,regSP)); // if (sym->UsesPredicate) } GenerateDiadic(op_ss|op_lws,0,make_string("pregs"),make_indexed(24,regSP)); //if (isOscall) { // GenerateDiadic(op_move,0,makereg(0),make_string("_TCBregsave")); // gen_regrestore(); //} // Generate the return instruction. For the Pascal calling convention pop the parameters // from the stack. if (sym->IsInterrupt) { //GenerateTriadic(op_addui,0,makereg(30),makereg(30),make_immed(24)); //GenerateDiadic(op_lm,0,make_indirect(30),make_mask(0x9FFFFFFE)); //GenerateTriadic(op_addui,0,makereg(30),makereg(30),make_immed(popcnt(0x9FFFFFFE)*8)); GenerateMonadic(op_rti,0,(AMODE *)NULL); return; } if (sym->IsPascal) { GenerateTriadic(op_addui,0,makereg(regSP),makereg(regSP),make_immed(32+sym->NumParms * 8)); GenerateMonadic(op_rts,0,(AMODE *)NULL); } else { GenerateTriadic(op_addui,0,makereg(regSP),makereg(regSP),make_immed(32)); GenerateMonadic(op_rts,0,(AMODE*)NULL); } } // Just branch to the already generated stack cleanup code. else { GenerateMonadic(op_bra,0,make_clabel(retlab)); } }
// Generate function epilog code. // void GenerateEpilog(SYM *sym) { AMODE *ap; int nn; int lab1; int cnt; // Generate the return code only once. Branch to the return code for all returns. GenerateLabel(retlab); // Unlock any semaphores that may have been set for (nn = lastsph - 1; nn >= 0; nn--) GenerateDiadic(op_sb,0,makereg(0),make_string(semaphores[nn])); if (sym->IsNocall) // nothing to do for nocall convention return; // Restore registers used as register variables. if( bsave_mask != 0 ) { cnt = (bitsset(bsave_mask)-1)*8; for (nn = 15; nn >=1 ; nn--) { if (bsave_mask & (1 << nn)) { GenerateDiadic(op_lws,0,makebreg(nn),make_indexed(cnt,regSP)); cnt -= 8; } } GenerateTriadic(op_addui,0,makereg(SP),makereg(regSP),make_immed(popcnt(bsave_mask)*8)); } if( save_mask != 0 ) { cnt = (bitsset(save_mask)-1)*8; for (nn = 31; nn >=1 ; nn--) { if (save_mask & (1 << nn)) { GenerateDiadic(op_lw,0,makereg(nn),make_indexed(cnt,regSP)); cnt -= 8; } } GenerateTriadic(op_addui,0,makereg(regSP),makereg(regSP),make_immed(popcnt(save_mask)*8)); } // Unlink the stack // For a leaf routine the link register and exception link register doesn't need to be saved/restored. if (lc_auto || sym->NumParms > 0) { GenerateDiadic(op_mov,0,makereg(regSP),makereg(regBP)); GenerateDiadic(op_lw,0,makereg(regBP),make_indirect(regSP)); } if (!sym->IsLeaf) { if (exceptions) GenerateDiadic(op_lws,0,makebreg(regXLR),make_indexed(8,regSP)); // 11=CLR GenerateDiadic(op_lws,0,makebreg(regLR),make_indexed(16,regSP)); // 1 = LR // if (sym->UsesPredicate) } GenerateDiadic(op_lws,0,make_string("pregs"),make_indexed(24,regSP)); GenerateDiadic(op_lw,0,makereg(regCLP),make_indexed(32,regSP)); if (sym->epilog) { if (optimize) opt1(sym->epilog); GenerateStatement(sym->epilog); return; } //if (isOscall) { // GenerateDiadic(op_move,0,makereg(0),make_string("_TCBregsave")); // gen_regrestore(); //} // Generate the return instruction. For the Pascal calling convention pop the parameters // from the stack. if (sym->IsInterrupt) { //GenerateTriadic(op_addui,0,makereg(30),makereg(30),make_immed(24)); //GenerateDiadic(op_lm,0,make_indirect(30),make_mask(0x9FFFFFFE)); //GenerateTriadic(op_addui,0,makereg(30),makereg(30),make_immed(popcnt(0x9FFFFFFE)*8)); GenerateMonadic(op_rti,0,(AMODE *)NULL); return; } if (sym->IsPascal) { GenerateTriadic(op_addui,0,makereg(regSP),makereg(regSP),make_immed(GetReturnBlockSize()+sym->NumParms * 8)); GenerateMonadic(op_rts,0,(AMODE *)NULL); } else { GenerateTriadic(op_addui,0,makereg(regSP),makereg(regSP),make_immed(GetReturnBlockSize())); GenerateMonadic(op_rts,0,(AMODE*)NULL); } }
void BVH4Intersector4Chunk<TriangleIntersector4>::intersect(const BVH4Intersector4Chunk* This, Ray4& ray, const __m128 valid_i) { sseb valid = valid_i; NodeRef invalid = (NodeRef)1; const BVH4* bvh = This->bvh; STAT3(normal.travs,1,popcnt(valid),4); /* load ray into registers */ ssef ray_near = select(valid,ray.tnear,pos_inf); ssef ray_far = select(valid,ray.tfar ,neg_inf); sse3f rdir = rcp_safe(ray.dir); ray.tfar = ray_far; /* allocate stack and push root node */ NodeRef stack_node[3*BVH4::maxDepth+1]; ssef stack_near[3*BVH4::maxDepth+1]; stack_node[0] = invalid; stack_near[0] = ssef(inf); stack_node[1] = bvh->root; stack_near[1] = ray_near; NodeRef* sptr_node = stack_node+2; ssef * sptr_near = stack_near+2; while (1) { /* pop next node from stack */ sptr_node--; sptr_near--; ssef curDist = *sptr_near; NodeRef curNode = *sptr_node; if (unlikely(curNode == invalid)) break; /* cull node if behind closest hit point */ const sseb m_dist = curDist < ray_far; if (unlikely(none(m_dist))) continue; while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf())) break; STAT3(normal.trav_nodes,1,popcnt(valid),4); const Node* const node = curNode.node(bvh->nodePtr()); //NodeRef(curNode).node(nodes); //prefetch<PFHINT_L1>((ssef*)node + 1); // depth first order prefetch /* pop of next node */ sptr_node--; sptr_near--; curNode = *sptr_node; curDist = *sptr_near; for (unsigned i=0;i<4;i++) { const ssef dminx = (ssef(node->lower_x[i]) - ray.org.x) * rdir.x; const ssef dmaxx = (ssef(node->upper_x[i]) - ray.org.x) * rdir.x; const ssef dminy = (ssef(node->lower_y[i]) - ray.org.y) * rdir.y; const ssef dmaxy = (ssef(node->upper_y[i]) - ray.org.y) * rdir.y; const ssef dminz = (ssef(node->lower_z[i]) - ray.org.z) * rdir.z; const ssef dmaxz = (ssef(node->upper_z[i]) - ray.org.z) * rdir.z; const ssef dlowerx = min(dminx,dmaxx); const ssef dupperx = max(dminx,dmaxx); const ssef dlowery = min(dminy,dmaxy); const ssef duppery = max(dminy,dmaxy); const ssef dlowerz = min(dminz,dmaxz); const ssef dupperz = max(dminz,dmaxz); const ssef near = max(dlowerx,dlowery,dlowerz,ray_near); const ssef far = min(dupperx,duppery,dupperz,ray_far ); const sseb mhit = near <= far; const ssef childDist = select(mhit,near,inf); const sseb closer = childDist < curDist; /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(mhit))) { const NodeRef child = node->child(i); //if (child != invalid) { sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(closer)) { *(sptr_node-1) = curNode; *(sptr_near-1) = curDist; curDist = childDist; curNode = child; } /* push hit child onto stack*/ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } } } /* return if stack is empty */ if (unlikely(curNode == invalid)) break; /* decode leaf node */ size_t num; STAT3(normal.trav_leaves,1,popcnt(valid),4); Triangle* tri = (Triangle*) curNode.leaf(bvh->triPtr(),num); /* intersect triangles */ for (size_t i=0; i<num; i++) TriangleIntersector4::intersect(valid,ray,tri[i],bvh->vertices); ray_far = ray.tfar; } }
void BVH8Intersector8Chunk<PrimitiveIntersector8>::intersect(avxb* valid_i, BVH8* bvh, Ray8& ray) { #if defined(__AVX__) /* load ray */ const avxb valid0 = *valid_i; const avx3f rdir = rcp_safe(ray.dir); const avx3f org_rdir = ray.org * rdir; avxf ray_tnear = select(valid0,ray.tnear,pos_inf); avxf ray_tfar = select(valid0,ray.tfar ,neg_inf); const avxf inf = avxf(pos_inf); Precalculations pre(valid0,ray); /* allocate stack and push root node */ avxf stack_near[3*BVH8::maxDepth+1]; NodeRef stack_node[3*BVH8::maxDepth+1]; stack_node[0] = BVH8::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* __restrict__ sptr_node = stack_node + 2; avxf* __restrict__ sptr_near = stack_near + 2; while (1) { /* pop next node from stack */ sptr_node--; sptr_near--; NodeRef cur = *sptr_node; if (unlikely(cur == BVH8::invalidNode)) break; /* cull node if behind closest hit point */ avxf curDist = *sptr_near; if (unlikely(none(ray_tfar > curDist))) continue; while (1) { /* test if this is a leaf node */ if (unlikely(cur.isLeaf())) break; const avxb valid_node = ray_tfar > curDist; STAT3(normal.trav_nodes,1,popcnt(valid_node),8); const Node* __restrict__ const node = (BVH8::Node*)cur.node(); /* pop of next node */ sptr_node--; sptr_near--; cur = *sptr_node; // FIXME: this trick creates issues with stack depth curDist = *sptr_near; for (unsigned i=0; i<BVH8::N; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH8::emptyNode)) break; #if defined(__AVX2__) const avxf lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x); const avxf lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y); const avxf lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z); const avxf lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x); const avxf lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y); const avxf lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z); const avxf lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); const avxf lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); const avxb lhit = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar); #else const avxf lclipMinX = node->lower_x[i] * rdir.x - org_rdir.x; const avxf lclipMinY = node->lower_y[i] * rdir.y - org_rdir.y; const avxf lclipMinZ = node->lower_z[i] * rdir.z - org_rdir.z; const avxf lclipMaxX = node->upper_x[i] * rdir.x - org_rdir.x; const avxf lclipMaxY = node->upper_y[i] * rdir.y - org_rdir.y; const avxf lclipMaxZ = node->upper_z[i] * rdir.z - org_rdir.z; const avxf lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const avxf lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const avxb lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); #endif /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { const avxf childDist = select(lhit,lnearP,inf); const NodeRef child = node->children[i]; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *sptr_node = cur; *sptr_near = curDist; sptr_node++; sptr_near++; curDist = childDist; cur = child; } /* push hit child onto stack*/ else { *sptr_node = child; *sptr_near = childDist; sptr_node++; sptr_near++; } assert(sptr_node - stack_node < BVH8::maxDepth); } } } /* return if stack is empty */ if (unlikely(cur == BVH8::invalidNode)) break; /* intersect leaf */ assert(cur != BVH8::emptyNode); const avxb valid_leaf = ray_tfar > curDist; STAT3(normal.trav_leaves,1,popcnt(valid_leaf),8); size_t items; const Triangle* tri = (Triangle*) cur.leaf(items); PrimitiveIntersector8::intersect(valid_leaf,pre,ray,tri,items,bvh->geometry); ray_tfar = select(valid_leaf,ray.tfar,ray_tfar); } AVX_ZERO_UPPER(); #endif }
void BVH8Intersector8Hybrid<PrimitiveIntersector8>::occluded(bool8* valid_i, BVH8* bvh, Ray8& ray) { /* load ray */ const bool8 valid = *valid_i; bool8 terminated = !valid; Vec3f8 ray_org = ray.org, ray_dir = ray.dir; float8 ray_tnear = ray.tnear, ray_tfar = ray.tfar; const Vec3f8 rdir = rcp_safe(ray_dir); const Vec3f8 org(ray_org), org_rdir = org * rdir; ray_tnear = select(valid,ray_tnear,float8(pos_inf)); ray_tfar = select(valid,ray_tfar ,float8(neg_inf)); const float8 inf = float8(pos_inf); Precalculations pre(valid,ray); /* compute near/far per ray */ Vec3i8 nearXYZ; nearXYZ.x = select(rdir.x >= 0.0f,int8(0*(int)sizeof(float8)),int8(1*(int)sizeof(float8))); nearXYZ.y = select(rdir.y >= 0.0f,int8(2*(int)sizeof(float8)),int8(3*(int)sizeof(float8))); nearXYZ.z = select(rdir.z >= 0.0f,int8(4*(int)sizeof(float8)),int8(5*(int)sizeof(float8))); /* allocate stack and push root node */ float8 stack_near[stackSizeChunk]; NodeRef stack_node[stackSizeChunk]; stack_node[0] = BVH8::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* stackEnd = stack_node+stackSizeChunk; NodeRef* __restrict__ sptr_node = stack_node + 2; float8* __restrict__ sptr_near = stack_near + 2; while (1) { /* pop next node from stack */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; NodeRef cur = *sptr_node; if (unlikely(cur == BVH8::invalidNode)) { assert(sptr_node == stack_node); break; } /* cull node if behind closest hit point */ float8 curDist = *sptr_near; const bool8 active = curDist < ray_tfar; if (unlikely(none(active))) continue; /* switch to single ray traversal */ #if !defined(__WIN32__) || defined(__X86_64__) size_t bits = movemask(active); if (unlikely(__popcnt(bits) <= SWITCH_THRESHOLD)) { for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) { if (occluded1(bvh,cur,i,pre,ray,ray_org,ray_dir,rdir,ray_tnear,ray_tfar,nearXYZ)) terminated[i] = -1; } if (all(terminated)) break; ray_tfar = select(terminated,float8(neg_inf),ray_tfar); continue; } #endif while (1) { /* test if this is a leaf node */ if (unlikely(cur.isLeaf())) break; const bool8 valid_node = ray_tfar > curDist; STAT3(shadow.trav_nodes,1,popcnt(valid_node),8); const Node* __restrict__ const node = (Node*)cur.node(); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; cur = *sptr_node; curDist = *sptr_near; for (unsigned i=0; i<BVH8::N; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH8::emptyNode)) break; #if defined(__AVX2__) const float8 lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x); const float8 lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y); const float8 lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z); const float8 lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x); const float8 lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y); const float8 lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z); const float8 lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); const float8 lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); const bool8 lhit = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar); #else const float8 lclipMinX = (node->lower_x[i] - org.x) * rdir.x; const float8 lclipMinY = (node->lower_y[i] - org.y) * rdir.y; const float8 lclipMinZ = (node->lower_z[i] - org.z) * rdir.z; const float8 lclipMaxX = (node->upper_x[i] - org.x) * rdir.x; const float8 lclipMaxY = (node->upper_y[i] - org.y) * rdir.y; const float8 lclipMaxZ = (node->upper_z[i] - org.z) * rdir.z; const float8 lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const float8 lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const bool8 lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); #endif /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); assert(child != BVH8::emptyNode); const float8 childDist = select(lhit,lnearP,inf); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = cur; *(sptr_near-1) = curDist; curDist = childDist; cur = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } } /* return if stack is empty */ if (unlikely(cur == BVH8::invalidNode)) { assert(sptr_node == stack_node); break; } /* intersect leaf */ assert(cur != BVH8::emptyNode); const bool8 valid_leaf = ray_tfar > curDist; STAT3(shadow.trav_leaves,1,popcnt(valid_leaf),8); size_t items; const Triangle* prim = (Triangle*) cur.leaf(items); terminated |= PrimitiveIntersector8::occluded(!terminated,pre,ray,prim,items,bvh->scene); if (all(terminated)) break; ray_tfar = select(terminated,float8(neg_inf),ray_tfar); } store8i(valid & terminated,&ray.geomID,0); AVX_ZERO_UPPER(); }
void BVH4mbIntersector16Hybrid<LeafIntersector>::intersect(int16* valid_i, BVH4mb* bvh, Ray16& ray16) { /* near and node stack */ __aligned(64) float16 stack_dist[3*BVH4i::maxDepth+1]; __aligned(64) NodeRef stack_node[3*BVH4i::maxDepth+1]; __aligned(64) NodeRef stack_node_single[3*BVH4i::maxDepth+1]; /* load ray */ const bool16 valid0 = *(int16*)valid_i != int16(0); const Vec3f16 rdir16 = rcp_safe(ray16.dir); const Vec3f16 org_rdir16 = ray16.org * rdir16; float16 ray_tnear = select(valid0,ray16.tnear,pos_inf); float16 ray_tfar = select(valid0,ray16.tfar ,neg_inf); const float16 inf = float16(pos_inf); /* allocate stack and push root node */ stack_node[0] = BVH4i::invalidNode; stack_dist[0] = inf; stack_node[1] = bvh->root; stack_dist[1] = ray_tnear; NodeRef* __restrict__ sptr_node = stack_node + 2; float16* __restrict__ sptr_dist = stack_dist + 2; const Node * __restrict__ nodes = (Node *)bvh->nodePtr(); const BVH4mb::Triangle01 * __restrict__ accel = (BVH4mb::Triangle01 *)bvh->triPtr(); while (1) pop: { /* pop next node from stack */ NodeRef curNode = *(sptr_node-1); float16 curDist = *(sptr_dist-1); sptr_node--; sptr_dist--; const bool16 m_stackDist = ray_tfar > curDist; /* stack emppty ? */ if (unlikely(curNode == BVH4i::invalidNode)) break; /* cull node if behind closest hit point */ if (unlikely(none(m_stackDist))) continue; /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /* switch to single ray mode */ if (unlikely(countbits(m_stackDist) <= BVH4i::hybridSIMDUtilSwitchThreshold)) { float *__restrict__ stack_dist_single = (float*)sptr_dist; store16f(stack_dist_single,inf); /* traverse single ray */ long rayIndex = -1; while((rayIndex = bitscan64(rayIndex,m_stackDist)) != BITSCAN_NO_BIT_SET_64) { stack_node_single[0] = BVH4i::invalidNode; stack_node_single[1] = curNode; size_t sindex = 2; const float16 org_xyz = loadAOS4to16f(rayIndex,ray16.org.x,ray16.org.y,ray16.org.z); const float16 dir_xyz = loadAOS4to16f(rayIndex,ray16.dir.x,ray16.dir.y,ray16.dir.z); const float16 rdir_xyz = loadAOS4to16f(rayIndex,rdir16.x,rdir16.y,rdir16.z); const float16 org_rdir_xyz = org_xyz * rdir_xyz; const float16 min_dist_xyz = broadcast1to16f(&ray16.tnear[rayIndex]); float16 max_dist_xyz = broadcast1to16f(&ray16.tfar[rayIndex]); const float16 time = broadcast1to16f(&ray16.time[rayIndex]); const unsigned int leaf_mask = BVH4I_LEAF_MASK; while (1) { NodeRef curNode = stack_node_single[sindex-1]; sindex--; traverse_single_intersect(curNode, sindex, rdir_xyz, org_rdir_xyz, min_dist_xyz, max_dist_xyz, time, stack_node_single, stack_dist_single, nodes, leaf_mask); /* return if stack is empty */ if (unlikely(curNode == BVH4i::invalidNode)) break; /* intersect one ray against four triangles */ const bool hit = LeafIntersector::intersect(curNode, rayIndex, dir_xyz, org_xyz, min_dist_xyz, max_dist_xyz, ray16, accel, (Scene*)bvh->geometry); if (hit) compactStack(stack_node_single,stack_dist_single,sindex,max_dist_xyz); } } ray_tfar = select(valid0,ray16.tfar ,neg_inf); continue; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////// const unsigned int leaf_mask = BVH4I_LEAF_MASK; const float16 time = ray16.time; const float16 one_time = (float16::one() - time); while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf(leaf_mask))) break; STAT3(normal.trav_nodes,1,popcnt(ray_tfar > curDist),16); const Node* __restrict__ const node = curNode.node(nodes); const BVH4mb::Node* __restrict__ const nodeMB = (BVH4mb::Node*)node; /* pop of next node */ sptr_node--; sptr_dist--; curNode = *sptr_node; curDist = *sptr_dist; prefetch<PFHINT_L1>((char*)node + 0*64); prefetch<PFHINT_L1>((char*)node + 1*64); prefetch<PFHINT_L1>((char*)node + 2*64); prefetch<PFHINT_L1>((char*)node + 3*64); #pragma unroll(4) for (unsigned int i=0; i<4; i++) { const NodeRef child = node->lower[i].child; const float16 lower_x = one_time * nodeMB->lower[i].x + time * nodeMB->lower_t1[i].x; const float16 lower_y = one_time * nodeMB->lower[i].y + time * nodeMB->lower_t1[i].y; const float16 lower_z = one_time * nodeMB->lower[i].z + time * nodeMB->lower_t1[i].z; const float16 upper_x = one_time * nodeMB->upper[i].x + time * nodeMB->upper_t1[i].x; const float16 upper_y = one_time * nodeMB->upper[i].y + time * nodeMB->upper_t1[i].y; const float16 upper_z = one_time * nodeMB->upper[i].z + time * nodeMB->upper_t1[i].z; if (unlikely(i >=2 && child == BVH4i::invalidNode)) break; const float16 lclipMinX = msub(lower_x,rdir16.x,org_rdir16.x); const float16 lclipMinY = msub(lower_y,rdir16.y,org_rdir16.y); const float16 lclipMinZ = msub(lower_z,rdir16.z,org_rdir16.z); const float16 lclipMaxX = msub(upper_x,rdir16.x,org_rdir16.x); const float16 lclipMaxY = msub(upper_y,rdir16.y,org_rdir16.y); const float16 lclipMaxZ = msub(upper_z,rdir16.z,org_rdir16.z); const float16 lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const float16 lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const bool16 lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); const float16 childDist = select(lhit,lnearP,inf); const bool16 m_child_dist = childDist < curDist; /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { sptr_node++; sptr_dist++; /* push cur node onto stack and continue with hit child */ if (any(m_child_dist)) { *(sptr_node-1) = curNode; *(sptr_dist-1) = curDist; curDist = childDist; curNode = child; } /* push hit child onto stack*/ else { *(sptr_node-1) = child; *(sptr_dist-1) = childDist; } assert(sptr_node - stack_node < BVH4i::maxDepth); } } #if SWITCH_ON_DOWN_TRAVERSAL == 1 const bool16 curUtil = ray_tfar > curDist; if (unlikely(countbits(curUtil) <= BVH4i::hybridSIMDUtilSwitchThreshold)) { *sptr_node++ = curNode; *sptr_dist++ = curDist; goto pop; } #endif } /* return if stack is empty */ if (unlikely(curNode == BVH4i::invalidNode)) break; /* intersect leaf */ const bool16 m_valid_leaf = ray_tfar > curDist; STAT3(normal.trav_leaves,1,popcnt(m_valid_leaf),16); LeafIntersector::intersect16(curNode, m_valid_leaf, ray16.dir, ray16.org, ray16, accel, (Scene*)bvh->geometry); ray_tfar = select(m_valid_leaf,ray16.tfar,ray_tfar); }
void BVH4iIntersector16Chunk<LeafIntersector,ENABLE_COMPRESSED_BVH4I_NODES>::occluded(mic_i* valid_i, BVH4i* bvh, Ray16& ray) { /* allocate stack */ __aligned(64) mic_f stack_dist[3*BVH4i::maxDepth+1]; __aligned(64) NodeRef stack_node[3*BVH4i::maxDepth+1]; /* load ray */ const mic_m valid = *(mic_i*)valid_i != mic_i(0); mic_m m_terminated = !valid; const mic3f rdir = rcp_safe(ray.dir); const mic3f org_rdir = ray.org * rdir; mic_f ray_tnear = select(valid,ray.tnear,pos_inf); mic_f ray_tfar = select(valid,ray.tfar ,neg_inf); const mic_f inf = mic_f(pos_inf); /* push root node */ stack_node[0] = BVH4i::invalidNode; stack_dist[0] = inf; stack_node[1] = bvh->root; stack_dist[1] = ray_tnear; NodeRef* __restrict__ sptr_node = stack_node + 2; mic_f* __restrict__ sptr_dist = stack_dist + 2; const Node * __restrict__ nodes = (Node *)bvh->nodePtr(); const Triangle1 * __restrict__ accel = (Triangle1*)bvh->triPtr(); const mic3f org = ray.org; const mic3f dir = ray.dir; while (1) { const mic_m m_active = !m_terminated; /* pop next node from stack */ NodeRef curNode = *(sptr_node-1); mic_f curDist = *(sptr_dist-1); sptr_node--; sptr_dist--; const mic_m m_stackDist = gt(m_active,ray_tfar,curDist); /* stack emppty ? */ if (unlikely(curNode == BVH4i::invalidNode)) break; /* cull node if behind closest hit point */ if (unlikely(none(m_stackDist))) { continue; } const unsigned int leaf_mask = BVH4I_LEAF_MASK; traverse_chunk_occluded<ENABLE_COMPRESSED_BVH4I_NODES>(curNode, curDist, rdir, org_rdir, ray_tnear, ray_tfar, m_active, sptr_node, sptr_dist, nodes, leaf_mask); /* return if stack is empty */ if (unlikely(curNode == BVH4i::invalidNode)) break; /* intersect leaf */ mic_m m_valid_leaf = gt(m_active,ray_tfar,curDist); STAT3(shadow.trav_leaves,1,popcnt(m_valid_leaf),16); LeafIntersector::occluded16(curNode,m_valid_leaf,dir,org,ray,m_terminated,accel,(Scene*)bvh->geometry); if (unlikely(all(m_terminated))) break; ray_tfar = select(m_terminated,neg_inf,ray_tfar); } store16i(valid & m_terminated,&ray.geomID,0); }