void BVH4Intersector8Single<types,robust, PrimitiveIntersector8>::occluded(avxb* valid_i, BVH4* bvh, Ray8& ray) { /* load ray */ const avxb valid = *valid_i; avxb terminated = !valid; avx3f ray_org = ray.org, ray_dir = ray.dir; avxf ray_tnear = ray.tnear, ray_tfar = ray.tfar; const avx3f rdir = rcp_safe(ray_dir); const avx3f org(ray_org), org_rdir = org * rdir; ray_tnear = select(valid,ray_tnear,avxf(pos_inf)); ray_tfar = select(valid,ray_tfar ,avxf(neg_inf)); const avxf inf = avxf(pos_inf); Precalculations pre(valid,ray); /* compute near/far per ray */ avx3i nearXYZ; nearXYZ.x = select(rdir.x >= 0.0f,avxi(0*(int)sizeof(ssef)),avxi(1*(int)sizeof(ssef))); nearXYZ.y = select(rdir.y >= 0.0f,avxi(2*(int)sizeof(ssef)),avxi(3*(int)sizeof(ssef))); nearXYZ.z = select(rdir.z >= 0.0f,avxi(4*(int)sizeof(ssef)),avxi(5*(int)sizeof(ssef))); /* we have no packet implementation for OBB nodes yet */ size_t bits = movemask(valid); for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) { if (occluded1(bvh,bvh->root,i,pre,ray,ray_org,ray_dir,rdir,ray_tnear,ray_tfar,nearXYZ)) terminated[i] = -1; } store8i(valid & terminated,&ray.geomID,0); AVX_ZERO_UPPER(); }
__m256 BVH2Intersector8Chunk<TriangleIntersector>::occluded(const BVH2Intersector8Chunk* This, Ray8& ray, const __m256 valid_i) { avxb valid = valid_i; avxb terminated = !valid; const BVH2* bvh = This->bvh; STAT3(shadow.travs,1,popcnt(valid),8); NodeRef stack[1+BVH2::maxDepth]; //!< stack of nodes that still need to get traversed NodeRef* stackPtr = stack; //!< current stack pointer NodeRef cur = bvh->root; //!< in cur we track the ID of the current node /* let inactive rays miss all boxes */ const avx3f rdir = rcp_safe(ray.dir); avxf rayFar = select(terminated,avxf(neg_inf),ray.tfar); while (true) { /*! downtraversal loop */ while (likely(cur.isNode())) { STAT3(normal.trav_nodes,1,popcnt(valid),8); /* intersect packet with box of both children */ const Node* node = cur.node(); const size_t hit0 = intersectBox(ray.org,rdir,ray.tnear,rayFar,node,0); const size_t hit1 = intersectBox(ray.org,rdir,ray.tnear,rayFar,node,1); /*! if two children are hit push both onto stack */ if (likely(hit0 != 0 && hit1 != 0)) { *stackPtr = node->child(0); stackPtr++; cur = node->child(1); } /*! if one child hit, continue with that child */ else { if (likely(hit0 != 0)) cur = node->child(0); else if (likely(hit1 != 0)) cur = node->child(1); else goto pop_node; } } /*! leaf node, intersect all triangles */ { STAT3(shadow.trav_leaves,1,popcnt(valid),8); size_t num; Triangle* tri = (Triangle*) cur.leaf(NULL,num); for (size_t i=0; i<num; i++) { terminated |= TriangleIntersector::occluded(valid,ray,tri[i],bvh->vertices); if (all(terminated)) return terminated; } /* let terminated rays miss all boxes */ rayFar = select(terminated,avxf(neg_inf),rayFar); } /*! pop next node from stack */ pop_node: if (unlikely(stackPtr == stack)) break; cur = *(--stackPtr); } return terminated; }
/* ray/box intersection */ __forceinline size_t intersectBox(const avx3f& org, const avx3f& rdir, const avxf& tnear, const avxf& tfar, const BVH2::Node* node, const int i) { const avxf dminx = (avxf(node->lower_upper_x[i+0]) - org.x) * rdir.x; const avxf dminy = (avxf(node->lower_upper_y[i+0]) - org.y) * rdir.y; const avxf dminz = (avxf(node->lower_upper_z[i+0]) - org.z) * rdir.z; const avxf dmaxx = (avxf(node->lower_upper_x[i+2]) - org.x) * rdir.x; const avxf dmaxy = (avxf(node->lower_upper_y[i+2]) - org.y) * rdir.y; const avxf dmaxz = (avxf(node->lower_upper_z[i+2]) - org.z) * rdir.z; const avxf dlowerx = min(dminx,dmaxx); const avxf dlowery = min(dminy,dmaxy); const avxf dlowerz = min(dminz,dmaxz); const avxf dupperx = max(dminx,dmaxx); const avxf duppery = max(dminy,dmaxy); const avxf dupperz = max(dminz,dmaxz); const avxf near = max(dlowerx,dlowery,dlowerz,tnear); const avxf far = min(dupperx,duppery,dupperz,tfar ); return movemask(near <= far); }
void BVH8Intersector8Chunk<PrimitiveIntersector8>::intersect(avxb* valid_i, BVH8* bvh, Ray8& ray) { #if defined(__AVX__) /* load ray */ const avxb valid0 = *valid_i; const avx3f rdir = rcp_safe(ray.dir); const avx3f org_rdir = ray.org * rdir; avxf ray_tnear = select(valid0,ray.tnear,pos_inf); avxf ray_tfar = select(valid0,ray.tfar ,neg_inf); const avxf inf = avxf(pos_inf); Precalculations pre(valid0,ray); /* allocate stack and push root node */ avxf stack_near[3*BVH8::maxDepth+1]; NodeRef stack_node[3*BVH8::maxDepth+1]; stack_node[0] = BVH8::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* __restrict__ sptr_node = stack_node + 2; avxf* __restrict__ sptr_near = stack_near + 2; while (1) { /* pop next node from stack */ sptr_node--; sptr_near--; NodeRef cur = *sptr_node; if (unlikely(cur == BVH8::invalidNode)) break; /* cull node if behind closest hit point */ avxf curDist = *sptr_near; if (unlikely(none(ray_tfar > curDist))) continue; while (1) { /* test if this is a leaf node */ if (unlikely(cur.isLeaf())) break; const avxb valid_node = ray_tfar > curDist; STAT3(normal.trav_nodes,1,popcnt(valid_node),8); const Node* __restrict__ const node = (BVH8::Node*)cur.node(); /* pop of next node */ sptr_node--; sptr_near--; cur = *sptr_node; // FIXME: this trick creates issues with stack depth curDist = *sptr_near; for (unsigned i=0; i<BVH8::N; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH8::emptyNode)) break; #if defined(__AVX2__) const avxf lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x); const avxf lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y); const avxf lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z); const avxf lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x); const avxf lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y); const avxf lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z); const avxf lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); const avxf lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); const avxb lhit = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar); #else const avxf lclipMinX = node->lower_x[i] * rdir.x - org_rdir.x; const avxf lclipMinY = node->lower_y[i] * rdir.y - org_rdir.y; const avxf lclipMinZ = node->lower_z[i] * rdir.z - org_rdir.z; const avxf lclipMaxX = node->upper_x[i] * rdir.x - org_rdir.x; const avxf lclipMaxY = node->upper_y[i] * rdir.y - org_rdir.y; const avxf lclipMaxZ = node->upper_z[i] * rdir.z - org_rdir.z; const avxf lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const avxf lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const avxb lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); #endif /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { const avxf childDist = select(lhit,lnearP,inf); const NodeRef child = node->children[i]; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *sptr_node = cur; *sptr_near = curDist; sptr_node++; sptr_near++; curDist = childDist; cur = child; } /* push hit child onto stack*/ else { *sptr_node = child; *sptr_near = childDist; sptr_node++; sptr_near++; } assert(sptr_node - stack_node < BVH8::maxDepth); } } } /* return if stack is empty */ if (unlikely(cur == BVH8::invalidNode)) break; /* intersect leaf */ assert(cur != BVH8::emptyNode); const avxb valid_leaf = ray_tfar > curDist; STAT3(normal.trav_leaves,1,popcnt(valid_leaf),8); size_t items; const Triangle* tri = (Triangle*) cur.leaf(items); PrimitiveIntersector8::intersect(valid_leaf,pre,ray,tri,items,bvh->geometry); ray_tfar = select(valid_leaf,ray.tfar,ray_tfar); } AVX_ZERO_UPPER(); #endif }
void BVH2Intersector8Chunk<TriangleIntersector>::intersect(const BVH2Intersector8Chunk* This, Ray8& ray, const __m256 valid_i) { avxb valid = valid_i; const BVH2* bvh = This->bvh; STAT3(normal.travs,1,popcnt(valid),8); struct StackItem { NodeRef ptr; avxf dist; }; StackItem stack[1+BVH2::maxDepth]; //!< stack of nodes that still need to get traversed StackItem* stackPtr = stack; //!< current stack pointer NodeRef cur = bvh->root; //!< in cur we track the ID of the current node /* let inactive rays miss all boxes */ const avx3f rdir = rcp_safe(ray.dir); ray.tfar = select(valid,ray.tfar,avxf(neg_inf)); while (true) { /*! downtraversal loop */ while (likely(cur.isNode())) { STAT3(normal.trav_nodes,1,popcnt(valid),8); /* intersect packet with box of both children */ const Node* node = cur.node(); avxf dist0; size_t hit0 = intersectBox(ray.org,rdir,ray.tnear,ray.tfar,node,0,dist0); avxf dist1; size_t hit1 = intersectBox(ray.org,rdir,ray.tnear,ray.tfar,node,1,dist1); /*! if two children hit, push far node onto stack and continue with closer node */ if (likely(hit0 != 0 && hit1 != 0)) { if (any(valid & (dist0 < dist1))) { stackPtr->ptr = node->child(1); stackPtr->dist = dist1; stackPtr++; cur = node->child(0); } else { stackPtr->ptr = node->child(0); stackPtr->dist = dist0; stackPtr++; cur = node->child(1); } } /*! if one child hit, continue with that child */ else { if (likely(hit0 != 0)) cur = node->child(0); else if (likely(hit1 != 0)) cur = node->child(1); else goto pop_node; } } /*! leaf node, intersect all triangles */ { STAT3(normal.trav_leaves,1,popcnt(valid),8); size_t num; Triangle* tri = (Triangle*) cur.leaf(NULL,num); for (size_t i=0; i<num; i++) { TriangleIntersector::intersect(valid,ray,tri[i],bvh->vertices); } } /*! pop next node from stack */ pop_node: if (unlikely(stackPtr == stack)) break; --stackPtr; cur = stackPtr->ptr; if (unlikely(all(stackPtr->dist > ray.tfar))) goto pop_node; } }
void BVH8iIntersector8Hybrid<TriangleIntersector8>::occluded(avxb* valid_i, BVH8i* bvh, Ray8& ray) { /* load ray */ const avxb valid = *valid_i; avxb terminated = !valid; avx3f ray_org = ray.org, ray_dir = ray.dir; avxf ray_tnear = ray.tnear, ray_tfar = ray.tfar; #if defined(__FIX_RAYS__) const avxf float_range = 0.1f*FLT_MAX; ray_org = clamp(ray_org,avx3f(-float_range),avx3f(+float_range)); ray_dir = clamp(ray_dir,avx3f(-float_range),avx3f(+float_range)); ray_tnear = max(ray_tnear,FLT_MIN); ray_tfar = min(ray_tfar,float(inf)); #endif const avx3f rdir = rcp_safe(ray_dir); const avx3f org(ray_org), org_rdir = org * rdir; ray_tnear = select(valid,ray_tnear,avxf(pos_inf)); ray_tfar = select(valid,ray_tfar ,avxf(neg_inf)); const avxf inf = avxf(pos_inf); /* compute near/far per ray */ avx3i nearXYZ; nearXYZ.x = select(rdir.x >= 0.0f,avxi(0*(int)sizeof(avxf)),avxi(1*(int)sizeof(avxf))); nearXYZ.y = select(rdir.y >= 0.0f,avxi(2*(int)sizeof(avxf)),avxi(3*(int)sizeof(avxf))); nearXYZ.z = select(rdir.z >= 0.0f,avxi(4*(int)sizeof(avxf)),avxi(5*(int)sizeof(avxf))); /* allocate stack and push root node */ avxf stack_near[stackSizeChunk]; NodeRef stack_node[stackSizeChunk]; stack_node[0] = BVH4i::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* stackEnd = stack_node+stackSizeChunk; NodeRef* __restrict__ sptr_node = stack_node + 2; avxf* __restrict__ sptr_near = stack_near + 2; const Node * __restrict__ nodes = (Node *)bvh->nodePtr(); const Triangle * __restrict__ accel = (Triangle*)bvh->triPtr(); while (1) { /* pop next node from stack */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; NodeRef curNode = *sptr_node; if (unlikely(curNode == BVH4i::invalidNode)) { assert(sptr_node == stack_node); break; } /* cull node if behind closest hit point */ avxf curDist = *sptr_near; const avxb active = curDist < ray_tfar; if (unlikely(none(active))) continue; /* switch to single ray traversal */ #if !defined(__WIN32__) || defined(__X86_64__) size_t bits = movemask(active); if (unlikely(__popcnt(bits) <= SWITCH_THRESHOLD)) { for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) { if (occluded1(bvh,curNode,i,ray,ray_org,ray_dir,rdir,ray_tnear,ray_tfar,nearXYZ)) terminated[i] = -1; } if (all(terminated)) break; ray_tfar = select(terminated,avxf(neg_inf),ray_tfar); continue; } #endif while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf())) break; const avxb valid_node = ray_tfar > curDist; STAT3(shadow.trav_nodes,1,popcnt(valid_node),8); const Node* __restrict__ const node = (Node*)curNode.node(nodes); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; curNode = *sptr_node; curDist = *sptr_near; for (unsigned i=0; i<8; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH4i::emptyNode)) break; #if defined(__AVX2__) const avxf lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x); const avxf lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y); const avxf lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z); const avxf lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x); const avxf lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y); const avxf lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z); const avxf lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); const avxf lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); const avxb lhit = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar); #else const avxf lclipMinX = (node->lower_x[i] - org.x) * rdir.x; const avxf lclipMinY = (node->lower_y[i] - org.y) * rdir.y; const avxf lclipMinZ = (node->lower_z[i] - org.z) * rdir.z; const avxf lclipMaxX = (node->upper_x[i] - org.x) * rdir.x; const avxf lclipMaxY = (node->upper_y[i] - org.y) * rdir.y; const avxf lclipMaxZ = (node->upper_z[i] - org.z) * rdir.z; const avxf lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const avxf lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const avxb lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); #endif /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); assert(child != BVH4i::emptyNode); const avxf childDist = select(lhit,lnearP,inf); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = curNode; *(sptr_near-1) = curDist; curDist = childDist; curNode = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } } /* return if stack is empty */ if (unlikely(curNode == BVH4i::invalidNode)) { assert(sptr_node == stack_node); break; } /* intersect leaf */ const avxb valid_leaf = ray_tfar > curDist; STAT3(shadow.trav_leaves,1,popcnt(valid_leaf),8); size_t items; const Triangle* prim = (Triangle*) curNode.leaf(accel,items); terminated |= TriangleIntersector8::occluded(!terminated,ray,prim,items,bvh->geometry); if (all(terminated)) break; ray_tfar = select(terminated,avxf(neg_inf),ray_tfar); } store8i(valid & terminated,&ray.geomID,0); AVX_ZERO_UPPER(); }
void BVH4Intersector8Chunk<types, robust, PrimitiveIntersector8>::intersect(avxb* valid_i, BVH4* bvh, Ray8& ray) { /* load ray */ const avxb valid0 = *valid_i; const avx3f rdir = rcp_safe(ray.dir); const avx3f org(ray.org), org_rdir = org * rdir; avxf ray_tnear = select(valid0,ray.tnear,pos_inf); avxf ray_tfar = select(valid0,ray.tfar ,neg_inf); const avxf inf = avxf(pos_inf); Precalculations pre(valid0,ray); /* allocate stack and push root node */ avxf stack_near[stackSize]; NodeRef stack_node[stackSize]; stack_node[0] = BVH4::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* stackEnd = stack_node+stackSize; NodeRef* __restrict__ sptr_node = stack_node + 2; avxf* __restrict__ sptr_near = stack_near + 2; while (1) { /* pop next node from stack */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; NodeRef cur = *sptr_node; if (unlikely(cur == BVH4::invalidNode)) { assert(sptr_node == stack_node); break; } /* cull node if behind closest hit point */ avxf curDist = *sptr_near; if (unlikely(none(ray_tfar > curDist))) continue; while (1) { /* process normal nodes */ if (likely((types & 0x1) && cur.isNode())) { const avxb valid_node = ray_tfar > curDist; STAT3(normal.trav_nodes,1,popcnt(valid_node),8); const Node* __restrict__ const node = cur.node(); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; cur = *sptr_node; curDist = *sptr_near; #pragma unroll(4) for (unsigned i=0; i<BVH4::N; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH4::emptyNode)) break; avxf lnearP; const avxb lhit = node->intersect8<robust>(i,org,rdir,org_rdir,ray_tnear,ray_tfar,lnearP); /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); assert(child != BVH4::emptyNode); const avxf childDist = select(lhit,lnearP,inf); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = cur; *(sptr_near-1) = curDist; curDist = childDist; cur = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } } /* process motion blur nodes */ else if (likely((types & 0x10) && cur.isNodeMB())) { const avxb valid_node = ray_tfar > curDist; STAT3(normal.trav_nodes,1,popcnt(valid_node),8); const BVH4::NodeMB* __restrict__ const node = cur.nodeMB(); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; cur = *sptr_node; curDist = *sptr_near; #pragma unroll(4) for (unsigned i=0; i<BVH4::N; i++) { const NodeRef child = node->child(i); if (unlikely(child == BVH4::emptyNode)) break; avxf lnearP; const avxb lhit = node->intersect(i,org,rdir,org_rdir,ray_tnear,ray_tfar,ray.time,lnearP); /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); assert(child != BVH4::emptyNode); const avxf childDist = select(lhit,lnearP,inf); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = cur; *(sptr_near-1) = curDist; curDist = childDist; cur = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } } else break; }
__forceinline void intersectT(const BVH4* bvh, Ray& ray) { typedef typename TriangleIntersector::Triangle Triangle; typedef StackItemT<size_t> StackItem; typedef typename BVH4::NodeRef NodeRef; typedef typename BVH4::Node Node; /*! stack state */ StackItem stack[1+3*BVH4::maxDepth]; //!< stack of nodes StackItem* stackPtr = stack+1; //!< current stack pointer stack[0].ptr = bvh->root; stack[0].dist = neg_inf; /*! load the ray into SIMD registers */ const avxf pos_neg = avxf(ssef(+0.0f),ssef(-0.0f)); const avxf neg_pos = avxf(ssef(-0.0f),ssef(+0.0f)); const avxf flipSignX = swapX ? neg_pos : pos_neg; const avxf flipSignY = swapY ? neg_pos : pos_neg; const avxf flipSignZ = swapZ ? neg_pos : pos_neg; const Vector3f ray_rdir = rcp_safe(ray.dir); const avx3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const avx3f rdir(ray_rdir.x^flipSignX,ray_rdir.y^flipSignY,ray_rdir.z^flipSignZ); const avx3f org_rdir(avx3f(ray.org.x,ray.org.y,ray.org.z)*rdir); avxf rayNearFar(ssef(ray.tnear),-ssef(ray.tfar)); const void* nodePtr = bvh->nodePtr(); const void* triPtr = bvh->triPtr(); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = NodeRef(stackPtr->ptr); /*! if popped node is too far, pop next one */ if (unlikely(stackPtr->dist > ray.tfar)) continue; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(normal.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(nodePtr); #if defined (__AVX2__) || defined(__MIC__) const avxf tLowerUpperX = msub(avxf::load(&node->lower_x), rdir.x, org_rdir.x); const avxf tLowerUpperY = msub(avxf::load(&node->lower_y), rdir.y, org_rdir.y); const avxf tLowerUpperZ = msub(avxf::load(&node->lower_z), rdir.z, org_rdir.z); #else const avxf tLowerUpperX = (norg.x + avxf::load(&node->lower_x)) * rdir.x; const avxf tLowerUpperY = (norg.y + avxf::load(&node->lower_y)) * rdir.y; const avxf tLowerUpperZ = (norg.z + avxf::load(&node->lower_z)) * rdir.z; #endif const avxf tNearFarX = swapX ? shuffle<1,0>(tLowerUpperX) : tLowerUpperX; const avxf tNearFarY = swapY ? shuffle<1,0>(tLowerUpperY) : tLowerUpperY; const avxf tNearFarZ = swapZ ? shuffle<1,0>(tLowerUpperZ) : tLowerUpperZ; const avxf tNearFar = max(tNearFarX,tNearFarY,tNearFarZ,rayNearFar); const ssef tNear = extract<0>(tNearFar); const ssef tFar = extract<1>(tNearFar); size_t mask = movemask(-tNear >= tFar); /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bsf(mask); mask = __btc(mask,r); if (likely(mask == 0)) { cur = node->child(r); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const float d0 = tNear[r]; r = __bsf(mask); mask = __btc(mask,r); NodeRef c1 = node->child(r); const float d1 = tNear[r]; if (likely(mask == 0)) { if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; continue; } else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; continue; } } /*! Here starts the slow path for 3 or 4 hit children. We push * all nodes onto the stack to sort them there. */ stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ r = __bsf(mask); mask = __btc(mask,r); NodeRef c = node->child(r); float d = tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ r = __bsf(mask); mask = __btc(mask,r); c = node->child(r); d = tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; } /*! this is a leaf node */ STAT3(normal.trav_leaves,1,1,1); size_t num; Triangle* tri = (Triangle*) cur.leaf(triPtr,num); for (size_t i=0; i<num; i++) TriangleIntersector::intersect(ray,tri[i],bvh->vertices); rayNearFar = insert<1>(rayNearFar,-ssef(ray.tfar)); } }
__forceinline bool occludedT(const BVH4* bvh, Ray& ray) { typedef typename TriangleIntersector::Triangle Triangle; typedef StackItemT<size_t> StackItem; typedef typename BVH4::NodeRef NodeRef; typedef typename BVH4::Node Node; /*! stack state */ NodeRef stack[1+3*BVH4::maxDepth]; //!< stack of nodes that still need to get traversed NodeRef* stackPtr = stack+1; //!< current stack pointer stack[0] = bvh->root; /*! load the ray into SIMD registers */ const avxf pos_neg = avxf(ssef(+0.0f),ssef(-0.0f)); const avxf neg_pos = avxf(ssef(-0.0f),ssef(+0.0f)); const avxf flipSignX = swapX ? neg_pos : pos_neg; const avxf flipSignY = swapY ? neg_pos : pos_neg; const avxf flipSignZ = swapZ ? neg_pos : pos_neg; const avx3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const Vector3f ray_rdir = rcp_safe(ray.dir); const avx3f rdir(ray_rdir.x^flipSignX,ray_rdir.y^flipSignY,ray_rdir.z^flipSignZ); const avx3f org_rdir(avx3f(ray.org.x,ray.org.y,ray.org.z)*rdir); const avxf rayNearFar(ssef(ray.tnear),-ssef(ray.tfar)); const void* nodePtr = bvh->nodePtr(); const void* triPtr = bvh->triPtr(); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = (NodeRef) *stackPtr; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(shadow.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(nodePtr); #if defined (__AVX2__) || defined(__MIC__) const avxf tLowerUpperX = msub(avxf::load(&node->lower_x), rdir.x, org_rdir.x); const avxf tLowerUpperY = msub(avxf::load(&node->lower_y), rdir.y, org_rdir.y); const avxf tLowerUpperZ = msub(avxf::load(&node->lower_z), rdir.z, org_rdir.z); #else const avxf tLowerUpperX = (norg.x + avxf::load(&node->lower_x)) * rdir.x; const avxf tLowerUpperY = (norg.y + avxf::load(&node->lower_y)) * rdir.y; const avxf tLowerUpperZ = (norg.z + avxf::load(&node->lower_z)) * rdir.z; #endif const avxf tNearFarX = swapX ? shuffle<1,0>(tLowerUpperX) : tLowerUpperX; const avxf tNearFarY = swapY ? shuffle<1,0>(tLowerUpperY) : tLowerUpperY; const avxf tNearFarZ = swapZ ? shuffle<1,0>(tLowerUpperZ) : tLowerUpperZ; const avxf tNearFar = max(tNearFarX,tNearFarY,tNearFarZ,rayNearFar); const ssef tNear = extract<0>(tNearFar); const ssef tFar = extract<1>(tNearFar); size_t mask = movemask(-tNear >= tFar); /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bsf(mask); mask = __btc(mask,r); if (likely(mask == 0)) { cur = node->child(r); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const float d0 = tNear[r]; r = __bsf(mask); mask = __btc(mask,r); NodeRef c1 = node->child(r); const float d1 = tNear[r]; if (likely(mask == 0)) { if (d0 < d1) { *stackPtr = c1; stackPtr++; cur = c0; continue; } else { *stackPtr = c0; stackPtr++; cur = c1; continue; } } *stackPtr = c0; stackPtr++; *stackPtr = c1; stackPtr++; /*! three children are hit */ r = __bsf(mask); mask = __btc(mask,r); cur = node->child(r); *stackPtr = cur; stackPtr++; if (likely(mask == 0)) { stackPtr--; continue; } /*! four children are hit */ cur = node->child(3); } /*! this is a leaf node */ STAT3(shadow.trav_leaves,1,1,1); size_t num; Triangle* tri = (Triangle*) cur.leaf(triPtr,num); for (size_t i=0; i<num; i++) { if (TriangleIntersector::occluded(ray,tri[i],bvh->vertices)) { AVX_ZERO_UPPER(); return true; } } } AVX_ZERO_UPPER(); return false; }
void BVH4Intersector8Hybrid<PrimitiveIntersector8>::intersect(avxb* valid_i, BVH4* bvh, Ray8& ray) { /* load ray */ const avxb valid0 = *valid_i; avx3f ray_org = ray.org, ray_dir = ray.dir; avxf ray_tnear = ray.tnear, ray_tfar = ray.tfar; #if defined(__FIX_RAYS__) const avxf float_range = 1.8E19; ray_org = clamp(ray_org,avx3f(-float_range),avx3f(+float_range)); ray_dir = clamp(ray_dir,avx3f(-float_range),avx3f(+float_range)); ray_tnear = max(ray_tnear,FLT_MIN); ray_tfar = min(ray_tfar,float(inf)); #endif const avx3f rdir = rcp_safe(ray_dir); const avx3f org(ray_org), org_rdir = org * rdir; ray_tnear = select(valid0,ray_tnear,avxf(pos_inf)); ray_tfar = select(valid0,ray_tfar ,avxf(neg_inf)); const avxf inf = avxf(pos_inf); /* allocate stack and push root node */ avxf stack_near[stackSizeChunk]; NodeRef stack_node[stackSizeChunk]; stack_node[0] = BVH4::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* stackEnd = stack_node+stackSizeChunk; NodeRef* __restrict__ sptr_node = stack_node + 2; avxf* __restrict__ sptr_near = stack_near + 2; while (1) { /* pop next node from stack */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; NodeRef curNode = *sptr_node; if (unlikely(curNode == BVH4::invalidNode)) { assert(sptr_node == stack_node); break; } /* cull node if behind closest hit point */ avxf curDist = *sptr_near; const avxb active = curDist < ray_tfar; if (unlikely(none(active))) continue; /* switch to single ray traversal */ #if !defined(__WIN32__) || defined(__X86_64__) size_t bits = movemask(active); if (unlikely(__popcnt(bits) <= SWITCH_THRESHOLD)) { for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) { intersect1(bvh,curNode,i,ray,ray_org,ray_dir,rdir,ray_tnear,ray_tfar); } ray_tfar = ray.tfar; continue; } #endif while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf())) break; const avxb valid_node = ray_tfar > curDist; STAT3(normal.trav_nodes,1,popcnt(valid_node),8); const Node* __restrict__ const node = curNode.node(); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; curNode = *sptr_node; curDist = *sptr_near; #pragma unroll(4) for (unsigned i=0; i<4; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH4::emptyNode)) break; #if defined(__AVX2__) const avxf lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x); const avxf lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y); const avxf lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z); const avxf lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x); const avxf lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y); const avxf lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z); const avxf lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); const avxf lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); const avxb lhit = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar); #else const avxf lclipMinX = (node->lower_x[i] - org.x) * rdir.x; const avxf lclipMinY = (node->lower_y[i] - org.y) * rdir.y; const avxf lclipMinZ = (node->lower_z[i] - org.z) * rdir.z; const avxf lclipMaxX = (node->upper_x[i] - org.x) * rdir.x; const avxf lclipMaxY = (node->upper_y[i] - org.y) * rdir.y; const avxf lclipMaxZ = (node->upper_z[i] - org.z) * rdir.z; const avxf lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const avxf lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const avxb lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); #endif /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); const avxf childDist = select(lhit,lnearP,inf); const NodeRef child = node->children[i]; assert(child != BVH4::emptyNode); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = curNode; *(sptr_near-1) = curDist; curDist = childDist; curNode = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } } /* return if stack is empty */ if (unlikely(curNode == BVH4::invalidNode)) { assert(sptr_node == stack_node); break; } /* intersect leaf */ const avxb valid_leaf = ray_tfar > curDist; STAT3(normal.trav_leaves,1,popcnt(valid_leaf),8); size_t items; const Primitive* prim = (Primitive*) curNode.leaf(items); PrimitiveIntersector8::intersect(valid_leaf,ray,prim,items,bvh->geometry); ray_tfar = select(valid_leaf,ray.tfar,ray_tfar); } AVX_ZERO_UPPER(); }
void BVH4Intersector8Chunk<TriangleIntersector8>::intersect(const BVH4Intersector8Chunk* This, Ray8& ray, const __m256 valid_i) { avxb valid = valid_i; NodeRef invalid = (NodeRef)1; const BVH4* bvh = This->bvh; /* load ray into registers */ avxf ray_near = select(valid,ray.tnear,pos_inf); avxf ray_far = select(valid,ray.tfar ,neg_inf); avx3f rdir = rcp_safe(ray.dir); ray.tfar = ray_far; /* allocate stack and push root node */ NodeRef stack_node[3*BVH4::maxDepth+1]; avxf stack_near[3*BVH4::maxDepth+1]; stack_node[0] = invalid; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_near; NodeRef* sptr_node = stack_node+2; avxf * sptr_near = stack_near+2; while (1) { /* pop next node from stack */ sptr_node--; sptr_near--; avxf curDist = *sptr_near; NodeRef curNode = *sptr_node; if (unlikely(curNode == invalid)) break; /* cull node if behind closest hit point */ const avxb m_dist = curDist < ray_far; if (unlikely(none(m_dist))) continue; while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf())) break; const Node* const node = curNode.node(bvh->nodePtr()); //NodeRef(curNode).node(nodes); //prefetch<PFHINT_L1>((avxf*)node + 1); // depth first order prefetch /* pop of next node */ sptr_node--; sptr_near--; curNode = *sptr_node; curDist = *sptr_near; for (unsigned i=0;i<4;i++) { const avxf dminx = (avxf(node->lower_x[i]) - ray.org.x) * rdir.x; const avxf dmaxx = (avxf(node->upper_x[i]) - ray.org.x) * rdir.x; const avxf dminy = (avxf(node->lower_y[i]) - ray.org.y) * rdir.y; const avxf dmaxy = (avxf(node->upper_y[i]) - ray.org.y) * rdir.y; const avxf dminz = (avxf(node->lower_z[i]) - ray.org.z) * rdir.z; const avxf dmaxz = (avxf(node->upper_z[i]) - ray.org.z) * rdir.z; const avxf dlowerx = min(dminx,dmaxx); const avxf dupperx = max(dminx,dmaxx); const avxf dlowery = min(dminy,dmaxy); const avxf duppery = max(dminy,dmaxy); const avxf dlowerz = min(dminz,dmaxz); const avxf dupperz = max(dminz,dmaxz); const avxf near = max(dlowerx,dlowery,dlowerz,ray_near); const avxf far = min(dupperx,duppery,dupperz,ray_far ); const avxb mhit = near <= far; const avxf childDist = select(mhit,near,inf); const avxb closer = childDist < curDist; /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(mhit))) { const NodeRef child = node->child(i); //if (child != invalid) { sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(closer)) { *(sptr_node-1) = curNode; *(sptr_near-1) = curDist; curDist = childDist; curNode = child; } /* push hit child onto stack*/ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } } } /* return if stack is empty */ if (unlikely(curNode == invalid)) break; /* decode leaf node */ size_t num; Triangle* tri = (Triangle*) curNode.leaf(bvh->triPtr(),num); /* intersect triangles */ for (size_t i=0; i<num; i++) TriangleIntersector8::intersect(valid,ray,tri[i],bvh->vertices); ray_far = ray.tfar; } }