__forceinline void BVH8Intersector8Hybrid<PrimitiveIntersector8>::intersect1(const BVH8* bvh, NodeRef root, const size_t k, Precalculations& pre, Ray8& ray,const Vec3f8 &ray_org, const Vec3f8 &ray_dir, const Vec3f8 &ray_rdir, const float8 &ray_tnear, const float8 &ray_tfar, const Vec3i8& nearXYZ) { /*! stack state */ StackItemT<NodeRef> stack[stackSizeSingle]; //!< stack of nodes StackItemT<NodeRef>* stackPtr = stack+1; //!< current stack pointer StackItemT<NodeRef>* stackEnd = stack+stackSizeSingle; stack[0].ptr = root; stack[0].dist = neg_inf; /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = nearXYZ.x[k]; const size_t nearY = nearXYZ.y[k]; const size_t nearZ = nearXYZ.z[k]; /*! load the ray into SIMD registers */ const Vec3f8 org (ray_org .x[k],ray_org .y[k],ray_org .z[k]); const Vec3f8 rdir(ray_rdir.x[k],ray_rdir.y[k],ray_rdir.z[k]); const Vec3f8 norg = -org, org_rdir(org*rdir); float8 rayNear(ray_tnear[k]), rayFar(ray_tfar[k]); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = NodeRef(stackPtr->ptr); /*! if popped node is too far, pop next one */ if (unlikely(*(float*)&stackPtr->dist > ray.tfar[k])) continue; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(normal.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(); const size_t farX = nearX ^ sizeof(float8), farY = nearY ^ sizeof(float8), farZ = nearZ ^ sizeof(float8); #if defined (__AVX2__) const float8 tNearX = msub(load8f((const char*)node+nearX), rdir.x, org_rdir.x); const float8 tNearY = msub(load8f((const char*)node+nearY), rdir.y, org_rdir.y); const float8 tNearZ = msub(load8f((const char*)node+nearZ), rdir.z, org_rdir.z); const float8 tFarX = msub(load8f((const char*)node+farX ), rdir.x, org_rdir.x); const float8 tFarY = msub(load8f((const char*)node+farY ), rdir.y, org_rdir.y); const float8 tFarZ = msub(load8f((const char*)node+farZ ), rdir.z, org_rdir.z); #else const float8 tNearX = (norg.x + load8f((const char*)node+nearX)) * rdir.x; const float8 tNearY = (norg.y + load8f((const char*)node+nearY)) * rdir.y; const float8 tNearZ = (norg.z + load8f((const char*)node+nearZ)) * rdir.z; const float8 tFarX = (norg.x + load8f((const char*)node+farX )) * rdir.x; const float8 tFarY = (norg.y + load8f((const char*)node+farY )) * rdir.y; const float8 tFarZ = (norg.z + load8f((const char*)node+farZ )) * rdir.z; #endif #if defined(__AVX2__) const float8 tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,rayNear)); const float8 tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,rayFar )); const bool8 vmask = cast(tNear) > cast(tFar); size_t mask = movemask(vmask)^0xff; #else const float8 tNear = max(tNearX,tNearY,tNearZ,rayNear); const float8 tFar = min(tFarX ,tFarY ,tFarZ ,rayFar); const bool8 vmask = tNear <= tFar; size_t mask = movemask(vmask); #endif /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); cur.prefetch(); assert(cur != BVH8::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); c0.prefetch(); const unsigned int d0 = ((unsigned int*)&tNear)[r]; r = __bscf(mask); NodeRef c1 = node->child(r); c1.prefetch(); const unsigned int d1 = ((unsigned int*)&tNear)[r]; assert(c0 != BVH8::emptyNode); assert(c1 != BVH8::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; continue; } else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; continue; } } /*! Here starts the slow path for 3 or 4 hit children. We push * all nodes onto the stack to sort them there. */ assert(stackPtr < stackEnd); stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; assert(stackPtr < stackEnd); stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); NodeRef c = node->child(r); c.prefetch(); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; assert(c != BVH8::emptyNode); if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ r = __bscf(mask); c = node->child(r); c.prefetch(); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } /*! fallback case if more than 4 children are hit */ while (1) { r = __bscf(mask); assert(stackPtr < stackEnd); c = node->child(r); c.prefetch(); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; if (unlikely(mask == 0)) break; } cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; } /*! this is a leaf node */ assert(cur != BVH8::emptyNode); STAT3(normal.trav_leaves,1,1,1); size_t num; Triangle* prim = (Triangle*) cur.leaf(num); PrimitiveIntersector8::intersect(pre,ray,k,prim,num,bvh->scene); rayFar = ray.tfar[k]; } }
void BVH4Intersector8Hybrid<PrimitiveIntersector8>::intersect(avxb* valid_i, BVH4* bvh, Ray8& ray) { /* load ray */ const avxb valid0 = *valid_i; avx3f ray_org = ray.org; avx3f ray_dir = ray.dir; avxf ray_tnear = ray.tnear, ray_tfar = ray.tfar; #if defined(__FIX_RAYS__) const avxf float_range = 1.8E19; ray_org = clamp(ray_org,avx3f(-float_range),avx3f(+float_range)); ray_dir = clamp(ray_dir,avx3f(-float_range),avx3f(+float_range)); ray_tnear = max(ray_tnear,FLT_MIN); ray_tfar = min(ray_tfar,float(inf)); #endif const avx3f rdir = rcp_safe(ray_dir); const avx3f org(ray_org), org_rdir = org * rdir; ray_tnear = select(valid0,ray_tnear,avxf(pos_inf)); ray_tfar = select(valid0,ray_tfar ,avxf(neg_inf)); const avxf inf = avxf(pos_inf); /* compute near/far per ray */ avx3i nearXYZ; nearXYZ.x = select(rdir.x >= 0.0f,avxi(0*(int)sizeof(ssef)),avxi(1*(int)sizeof(ssef))); nearXYZ.y = select(rdir.y >= 0.0f,avxi(2*(int)sizeof(ssef)),avxi(3*(int)sizeof(ssef))); nearXYZ.z = select(rdir.z >= 0.0f,avxi(4*(int)sizeof(ssef)),avxi(5*(int)sizeof(ssef))); /* allocate stack and push root node */ avxf stack_near[stackSizeChunk]; NodeRef stack_node[stackSizeChunk]; stack_node[0] = BVH4::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* stackEnd = stack_node+stackSizeChunk; NodeRef* __restrict__ sptr_node = stack_node + 2; avxf* __restrict__ sptr_near = stack_near + 2; while (1) { /* pop next node from stack */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; NodeRef curNode = *sptr_node; if (unlikely(curNode == BVH4::invalidNode)) { assert(sptr_node == stack_node); break; } /* cull node if behind closest hit point */ avxf curDist = *sptr_near; const avxb active = curDist < ray_tfar; if (unlikely(none(active))) continue; /* switch to single ray traversal */ #if !defined(__WIN32__) || defined(__X86_64__) size_t bits = movemask(active); if (unlikely(__popcnt(bits) <= SWITCH_THRESHOLD)) { for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) { intersect1(bvh, curNode, i, ray, ray_org, ray_dir, rdir, ray_tnear, ray_tfar, nearXYZ); } ray_tfar = ray.tfar; continue; } #endif while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf())) break; const avxb valid_node = ray_tfar > curDist; STAT3(normal.trav_nodes,1,popcnt(valid_node),8); const Node* __restrict__ const node = curNode.node(); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; curNode = *sptr_node; curDist = *sptr_near; for (size_t i=0; i<4; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH4::emptyNode)) break; #if defined(__AVX2__) const avxf lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x); const avxf lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y); const avxf lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z); const avxf lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x); const avxf lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y); const avxf lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z); const avxf lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); const avxf lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); const avxb lhit = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar); #else const avxf lclipMinX = (node->lower_x[i] - org.x) * rdir.x; const avxf lclipMinY = (node->lower_y[i] - org.y) * rdir.y; const avxf lclipMinZ = (node->lower_z[i] - org.z) * rdir.z; const avxf lclipMaxX = (node->upper_x[i] - org.x) * rdir.x; const avxf lclipMaxY = (node->upper_y[i] - org.y) * rdir.y; const avxf lclipMaxZ = (node->upper_z[i] - org.z) * rdir.z; const avxf lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const avxf lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const avxb lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); #endif /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); const avxf childDist = select(lhit,lnearP,inf); const NodeRef child = node->children[i]; assert(child != BVH4::emptyNode); child.prefetch(); /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *sptr_node = curNode; *sptr_near = curDist; curDist = childDist; curNode = child; sptr_node++; sptr_near++; } /* push hit child onto stack */ else { *sptr_node = child; *sptr_near = childDist; sptr_node++; sptr_near++; } } } } /* return if stack is empty */ if (unlikely(curNode == BVH4::invalidNode)) { assert(sptr_node == stack_node); break; } /* intersect leaf */ const avxb valid_leaf = ray_tfar > curDist; STAT3(normal.trav_leaves,1,popcnt(valid_leaf),8); size_t items; const Primitive* prim = (Primitive*) curNode.leaf(items); PrimitiveIntersector8::intersect(valid_leaf,ray,prim,items,bvh->geometry); ray_tfar = select(valid_leaf,ray.tfar,ray_tfar); } AVX_ZERO_UPPER(); }
__forceinline bool BVH8Intersector8Hybrid<PrimitiveIntersector8>::occluded1(const BVH8* bvh, NodeRef root, const size_t k, Precalculations& pre, Ray8& ray,const Vec3f8 &ray_org, const Vec3f8 &ray_dir, const Vec3f8 &ray_rdir, const float8 &ray_tnear, const float8 &ray_tfar, const Vec3i8& nearXYZ) { /*! stack state */ NodeRef stack[stackSizeSingle]; //!< stack of nodes that still need to get traversed NodeRef* stackPtr = stack+1; //!< current stack pointer NodeRef* stackEnd = stack+stackSizeSingle; stack[0] = root; /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = nearXYZ.x[k]; const size_t nearY = nearXYZ.y[k]; const size_t nearZ = nearXYZ.z[k]; /*! load the ray into SIMD registers */ const Vec3f8 org (ray_org .x[k],ray_org .y[k],ray_org .z[k]); const Vec3f8 rdir(ray_rdir.x[k],ray_rdir.y[k],ray_rdir.z[k]); const Vec3f8 norg = -org, org_rdir(org*rdir); const float8 rayNear(ray_tnear[k]), rayFar(ray_tfar[k]); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = (NodeRef) *stackPtr; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(shadow.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(); const size_t farX = nearX ^ sizeof(float8), farY = nearY ^ sizeof(float8), farZ = nearZ ^ sizeof(float8); #if defined (__AVX2__) const float8 tNearX = msub(load8f((const char*)node+nearX), rdir.x, org_rdir.x); const float8 tNearY = msub(load8f((const char*)node+nearY), rdir.y, org_rdir.y); const float8 tNearZ = msub(load8f((const char*)node+nearZ), rdir.z, org_rdir.z); const float8 tFarX = msub(load8f((const char*)node+farX ), rdir.x, org_rdir.x); const float8 tFarY = msub(load8f((const char*)node+farY ), rdir.y, org_rdir.y); const float8 tFarZ = msub(load8f((const char*)node+farZ ), rdir.z, org_rdir.z); #else const float8 tNearX = (norg.x + load8f((const char*)node+nearX)) * rdir.x; const float8 tNearY = (norg.y + load8f((const char*)node+nearY)) * rdir.y; const float8 tNearZ = (norg.z + load8f((const char*)node+nearZ)) * rdir.z; const float8 tFarX = (norg.x + load8f((const char*)node+farX )) * rdir.x; const float8 tFarY = (norg.y + load8f((const char*)node+farY )) * rdir.y; const float8 tFarZ = (norg.z + load8f((const char*)node+farZ )) * rdir.z; #endif #if defined(__AVX2__) const float8 tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,rayNear)); const float8 tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,rayFar )); const bool8 vmask = cast(tNear) > cast(tFar); size_t mask = movemask(vmask)^0xff; #else const float8 tNear = max(tNearX,tNearY,tNearZ,rayNear); const float8 tFar = min(tFarX ,tFarY ,tFarZ ,rayFar); const bool8 vmask = tNear <= tFar; size_t mask = movemask(vmask); #endif /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); cur.prefetch(); assert(cur != BVH8::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); c0.prefetch(); const unsigned int d0 = ((unsigned int*)&tNear)[r]; r = __bscf(mask); NodeRef c1 = node->child(r); c1.prefetch(); const unsigned int d1 = ((unsigned int*)&tNear)[r]; assert(c0 != BVH8::emptyNode); assert(c1 != BVH8::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { *stackPtr = c1; stackPtr++; cur = c0; continue; } else { *stackPtr = c0; stackPtr++; cur = c1; continue; } } assert(stackPtr < stackEnd); *stackPtr = c0; stackPtr++; assert(stackPtr < stackEnd); *stackPtr = c1; stackPtr++; /*! three children are hit */ r = __bscf(mask); cur = node->child(r); cur.prefetch(); *stackPtr = cur; stackPtr++; if (likely(mask == 0)) { stackPtr--; continue; } /*! process more than three children */ while(1) { r = __bscf(mask); NodeRef c = node->child(r); c.prefetch(); *stackPtr = c; stackPtr++; if (unlikely(mask == 0)) break; } cur = (NodeRef) stackPtr[-1]; stackPtr--; } /*! this is a leaf node */ assert(cur != BVH8::emptyNode); STAT3(shadow.trav_leaves,1,1,1); size_t num; Triangle* prim = (Triangle*) cur.leaf(num); if (PrimitiveIntersector8::occluded(pre,ray,k,prim,num,bvh->scene)) { //ray.geomID = 0; //break; return true; } } return false; }
void BVH8Intersector4Hybrid<PrimitiveIntersector4>::occluded(sseb* valid_i, BVH8* bvh, Ray4& ray) { /* load ray */ const sseb valid = *valid_i; sseb terminated = !valid; sse3f ray_org = ray.org, ray_dir = ray.dir; ssef ray_tnear = ray.tnear, ray_tfar = ray.tfar; const sse3f rdir = rcp_safe(ray_dir); const sse3f org(ray_org), org_rdir = org * rdir; ray_tnear = select(valid,ray_tnear,ssef(pos_inf)); ray_tfar = select(valid,ray_tfar ,ssef(neg_inf)); const ssef inf = ssef(pos_inf); Precalculations pre(valid,ray); /* allocate stack and push root node */ ssef stack_near[stackSizeChunk]; NodeRef stack_node[stackSizeChunk]; stack_node[0] = BVH8::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* stackEnd = stack_node+stackSizeChunk; NodeRef* __restrict__ sptr_node = stack_node + 2; ssef* __restrict__ sptr_near = stack_near + 2; while (1) { /* pop next node from stack */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; NodeRef cur = *sptr_node; if (unlikely(cur == BVH8::invalidNode)) { assert(sptr_node == stack_node); break; } /* cull node if behind closest hit point */ ssef curDist = *sptr_near; const sseb active = curDist < ray_tfar; if (unlikely(none(active))) continue; /* switch to single ray traversal */ #if !defined(__WIN32__) || defined(__X86_64__) size_t bits = movemask(active); if (unlikely(__popcnt(bits) <= SWITCH_THRESHOLD)) { for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) { if (occluded1(bvh,cur,i,pre,ray,ray_org,ray_dir,rdir,ray_tnear,ray_tfar)) terminated[i] = -1; } if (all(terminated)) break; ray_tfar = select(terminated,ssef(neg_inf),ray_tfar); continue; } #endif while (1) { /* test if this is a leaf node */ if (unlikely(cur.isLeaf())) break; const sseb valid_node = ray_tfar > curDist; STAT3(shadow.trav_nodes,1,popcnt(valid_node),4); const Node* __restrict__ const node = cur.node(); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; cur = *sptr_node; curDist = *sptr_near; #pragma unroll(4) for (unsigned i=0; i<BVH8::N; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH8::emptyNode)) break; #if defined(__AVX2__) const ssef lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x); const ssef lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y); const ssef lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z); const ssef lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x); const ssef lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y); const ssef lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z); #else const ssef lclipMinX = (node->lower_x[i] - org.x) * rdir.x; const ssef lclipMinY = (node->lower_y[i] - org.y) * rdir.y; const ssef lclipMinZ = (node->lower_z[i] - org.z) * rdir.z; const ssef lclipMaxX = (node->upper_x[i] - org.x) * rdir.x; const ssef lclipMaxY = (node->upper_y[i] - org.y) * rdir.y; const ssef lclipMaxZ = (node->upper_z[i] - org.z) * rdir.z; #endif #if defined(__SSE4_1__) const ssef lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); const ssef lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); const sseb lhit = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar); #else const ssef lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const ssef lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const sseb lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); #endif /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); assert(child != BVH8::emptyNode); child.prefetch(); const ssef childDist = select(lhit,lnearP,inf); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = cur; *(sptr_near-1) = curDist; curDist = childDist; cur = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } } /* return if stack is empty */ if (unlikely(cur == BVH8::invalidNode)) { assert(sptr_node == stack_node); break; } /* intersect leaf */ assert(cur != BVH8::emptyNode); const sseb valid_leaf = ray_tfar > curDist; STAT3(shadow.trav_leaves,1,popcnt(valid_leaf),4); size_t items; const Primitive* prim = (Primitive*) cur.leaf(items); terminated |= PrimitiveIntersector4::occluded(!terminated,pre,ray,prim,items,bvh->scene); if (all(terminated)) break; ray_tfar = select(terminated,ssef(neg_inf),ray_tfar); } store4i(valid & terminated,&ray.geomID,0); AVX_ZERO_UPPER(); }
void BVH8Intersector1<robust,PrimitiveIntersector>::intersect(const BVH8* bvh, Ray& ray) { /*! perform per ray precalculations required by the primitive intersector */ Precalculations pre(ray,bvh); /*! stack state */ StackItemT<NodeRef> stack[stackSize]; //!< stack of nodes StackItemT<NodeRef>* stackPtr = stack+1; //!< current stack pointer StackItemT<NodeRef>* stackEnd = stack+stackSize; stack[0].ptr = bvh->root; stack[0].dist = neg_inf; /* filter out invalid rays */ #if defined(RTCORE_IGNORE_INVALID_RAYS) if (!ray.valid()) return; #endif /* verify correct input */ assert(ray.tnear > -FLT_MIN); //assert(!(types & BVH4::FLAG_NODE_MB) || (ray.time >= 0.0f && ray.time <= 1.0f)); /*! load the ray into SIMD registers */ const Vec3f8 norg(-ray.org.x,-ray.org.y,-ray.org.z); const Vec3fa ray_rdir = rcp_safe(ray.dir); const Vec3f8 rdir(ray_rdir.x,ray_rdir.y,ray_rdir.z); const Vec3fa ray_org_rdir = ray.org*ray_rdir; const Vec3f8 org_rdir(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z); const float8 ray_near(ray.tnear); float8 ray_far(ray.tfar); /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray_rdir.x >= 0.0f ? 0*sizeof(float8) : 1*sizeof(float8); const size_t nearY = ray_rdir.y >= 0.0f ? 2*sizeof(float8) : 3*sizeof(float8); const size_t nearZ = ray_rdir.z >= 0.0f ? 4*sizeof(float8) : 5*sizeof(float8); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = NodeRef(stackPtr->ptr); /*! if popped node is too far, pop next one */ if (unlikely(*(float*)&stackPtr->dist > ray.tfar)) continue; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(normal.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(); const size_t farX = nearX ^ sizeof(float8), farY = nearY ^ sizeof(float8), farZ = nearZ ^ sizeof(float8); #if defined (__AVX2__) const float8 tNearX = msub(load8f((const char*)node+nearX), rdir.x, org_rdir.x); const float8 tNearY = msub(load8f((const char*)node+nearY), rdir.y, org_rdir.y); const float8 tNearZ = msub(load8f((const char*)node+nearZ), rdir.z, org_rdir.z); const float8 tFarX = msub(load8f((const char*)node+farX ), rdir.x, org_rdir.x); const float8 tFarY = msub(load8f((const char*)node+farY ), rdir.y, org_rdir.y); const float8 tFarZ = msub(load8f((const char*)node+farZ ), rdir.z, org_rdir.z); #else const float8 tNearX = (norg.x + load8f((const char*)node+nearX)) * rdir.x; const float8 tNearY = (norg.y + load8f((const char*)node+nearY)) * rdir.y; const float8 tNearZ = (norg.z + load8f((const char*)node+nearZ)) * rdir.z; const float8 tFarX = (norg.x + load8f((const char*)node+farX )) * rdir.x; const float8 tFarY = (norg.y + load8f((const char*)node+farY )) * rdir.y; const float8 tFarZ = (norg.z + load8f((const char*)node+farZ )) * rdir.z; #endif const float round_down = 1.0f-2.0f*float(ulp); const float round_up = 1.0f+2.0f*float(ulp); #if defined(__AVX2__) const float8 tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,ray_near)); const float8 tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,ray_far )); const bool8 vmask = robust ? (round_down*tNear > round_up*tFar) : cast(tNear) > cast(tFar); size_t mask = movemask(vmask)^0xff; #else const float8 tNear = max(tNearX,tNearY,tNearZ,ray_near); const float8 tFar = min(tFarX ,tFarY ,tFarZ ,ray_far); const bool8 vmask = robust ? (round_down*tNear > round_up*tFar) : tNear <= tFar; size_t mask = movemask(vmask); #endif /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); cur.prefetch(); assert(cur != BVH8::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); c0.prefetch(); const unsigned int d0 = ((unsigned int*)&tNear)[r]; r = __bscf(mask); NodeRef c1 = node->child(r); c1.prefetch(); const unsigned int d1 = ((unsigned int*)&tNear)[r]; assert(c0 != BVH8::emptyNode); assert(c1 != BVH8::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; continue; } else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; continue; } } /*! Here starts the slow path for 3 or 4 hit children. We push * all nodes onto the stack to sort them there. */ assert(stackPtr < stackEnd); stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; assert(stackPtr < stackEnd); stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); NodeRef c = node->child(r); c.prefetch(); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; assert(c != BVH8::emptyNode); if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ r = __bscf(mask); c = node->child(r); c.prefetch(); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } /*! fallback case if more than 4 children are hit */ while (1) { r = __bscf(mask); assert(stackPtr < stackEnd); c = node->child(r); c.prefetch(); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; if (unlikely(mask == 0)) break; } cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; } /*! this is a leaf node */ assert(cur != BVH8::emptyNode); STAT3(normal.trav_leaves,1,1,1); size_t num; Primitive* prim = (Primitive*) cur.leaf(num); size_t lazy_node = 0; PrimitiveIntersector::intersect(pre,ray,prim,num,bvh->scene,lazy_node); ray_far = ray.tfar; if (unlikely(lazy_node)) { stackPtr->ptr = lazy_node; stackPtr->dist = inf; stackPtr++; } } AVX_ZERO_UPPER(); }
void BVH8Intersector1<robust,PrimitiveIntersector>::occluded(const BVH8* bvh, Ray& ray) { /*! perform per ray precalculations required by the primitive intersector */ Precalculations pre(ray,bvh); /*! stack state */ NodeRef stack[stackSize]; //!< stack of nodes that still need to get traversed NodeRef* stackPtr = stack+1; //!< current stack pointer NodeRef* stackEnd = stack+stackSize; stack[0] = bvh->root; /* filter out invalid rays */ #if defined(RTCORE_IGNORE_INVALID_RAYS) if (!ray.valid()) return; #endif /* verify correct input */ assert(ray.tnear > -FLT_MIN); //assert(!(types & BVH4::FLAG_NODE_MB) || (ray.time >= 0.0f && ray.time <= 1.0f)); /*! load the ray into SIMD registers */ const Vec3f8 norg(-ray.org.x,-ray.org.y,-ray.org.z); const Vec3fa ray_rdir = rcp_safe(ray.dir); const Vec3f8 rdir(ray_rdir.x,ray_rdir.y,ray_rdir.z); const Vec3fa ray_org_rdir = ray.org*ray_rdir; const Vec3f8 org_rdir(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z); const float8 ray_near(ray.tnear); float8 ray_far(ray.tfar); /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray_rdir.x >= 0 ? 0*sizeof(float8) : 1*sizeof(float8); const size_t nearY = ray_rdir.y >= 0 ? 2*sizeof(float8) : 3*sizeof(float8); const size_t nearZ = ray_rdir.z >= 0 ? 4*sizeof(float8) : 5*sizeof(float8); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = (NodeRef) *stackPtr; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(shadow.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(); const size_t farX = nearX ^ sizeof(float8), farY = nearY ^ sizeof(float8), farZ = nearZ ^ sizeof(float8); #if defined (__AVX2__) const float8 tNearX = msub(load8f((const char*)node+nearX), rdir.x, org_rdir.x); const float8 tNearY = msub(load8f((const char*)node+nearY), rdir.y, org_rdir.y); const float8 tNearZ = msub(load8f((const char*)node+nearZ), rdir.z, org_rdir.z); const float8 tFarX = msub(load8f((const char*)node+farX ), rdir.x, org_rdir.x); const float8 tFarY = msub(load8f((const char*)node+farY ), rdir.y, org_rdir.y); const float8 tFarZ = msub(load8f((const char*)node+farZ ), rdir.z, org_rdir.z); #else const float8 tNearX = (norg.x + load8f((const char*)node+nearX)) * rdir.x; const float8 tNearY = (norg.y + load8f((const char*)node+nearY)) * rdir.y; const float8 tNearZ = (norg.z + load8f((const char*)node+nearZ)) * rdir.z; const float8 tFarX = (norg.x + load8f((const char*)node+farX )) * rdir.x; const float8 tFarY = (norg.y + load8f((const char*)node+farY )) * rdir.y; const float8 tFarZ = (norg.z + load8f((const char*)node+farZ )) * rdir.z; #endif #if defined(__AVX2__) const float8 tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,ray_near)); const float8 tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,ray_far )); const bool8 vmask = cast(tNear) > cast(tFar); size_t mask = movemask(vmask)^0xff; #else const float8 tNear = max(tNearX,tNearY,tNearZ,ray_near); const float8 tFar = min(tFarX ,tFarY ,tFarZ ,ray_far); const bool8 vmask = tNear <= tFar; size_t mask = movemask(vmask); #endif /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); cur.prefetch(); assert(cur != BVH8::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); c0.prefetch(); const unsigned int d0 = ((unsigned int*)&tNear)[r]; r = __bscf(mask); NodeRef c1 = node->child(r); c1.prefetch(); const unsigned int d1 = ((unsigned int*)&tNear)[r]; assert(c0 != BVH8::emptyNode); assert(c1 != BVH8::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { *stackPtr = c1; stackPtr++; cur = c0; continue; } else { *stackPtr = c0; stackPtr++; cur = c1; continue; } } assert(stackPtr < stackEnd); *stackPtr = c0; stackPtr++; assert(stackPtr < stackEnd); *stackPtr = c1; stackPtr++; /*! three children are hit */ r = __bscf(mask); cur = node->child(r); cur.prefetch(); *stackPtr = cur; stackPtr++; if (likely(mask == 0)) { stackPtr--; continue; } /*! process more than three children */ while(1) { r = __bscf(mask); NodeRef c = node->child(r); c.prefetch(); *stackPtr = c; stackPtr++; if (unlikely(mask == 0)) break; } cur = (NodeRef) stackPtr[-1]; stackPtr--; } /*! this is a leaf node */ assert(cur != BVH8::emptyNode); STAT3(shadow.trav_leaves,1,1,1); size_t num; Primitive* prim = (Primitive*) cur.leaf(num); size_t lazy_node = 0; if (PrimitiveIntersector::occluded(pre,ray,prim,num,bvh->scene,lazy_node)) { ray.geomID = 0; break; } if (unlikely(lazy_node)) { *stackPtr = (NodeRef)lazy_node; stackPtr++; } } AVX_ZERO_UPPER(); }
void BVH4Intersector1<types,robust,PrimitiveIntersector>::intersect(const BVH4* bvh, Ray& ray) { /*! perform per ray precalculations required by the primitive intersector */ Precalculations pre(ray); BVH4::UnalignedNodeMB::Precalculations pre1(ray); /*! stack state */ StackItemInt32<NodeRef> stack[stackSize]; //!< stack of nodes StackItemInt32<NodeRef>* stackPtr = stack+1; //!< current stack pointer StackItemInt32<NodeRef>* stackEnd = stack+stackSize; stack[0].ptr = bvh->root; stack[0].dist = neg_inf; /*! load the ray into SIMD registers */ const Vec3fa ray_rdir = rcp_safe(ray.dir); const Vec3fa ray_org_rdir = ray.org*ray_rdir; const sse3f org(ray.org.x,ray.org.y,ray.org.z); const sse3f dir(ray.dir.x,ray.dir.y,ray.dir.z); const sse3f rdir(ray_rdir.x,ray_rdir.y,ray_rdir.z); const sse3f org_rdir(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z); const ssef ray_near(ray.tnear); ssef ray_far(ray.tfar); /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray_rdir.x >= 0.0f ? 0*sizeof(ssef) : 1*sizeof(ssef); const size_t nearY = ray_rdir.y >= 0.0f ? 2*sizeof(ssef) : 3*sizeof(ssef); const size_t nearZ = ray_rdir.z >= 0.0f ? 4*sizeof(ssef) : 5*sizeof(ssef); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = NodeRef(stackPtr->ptr); /*! if popped node is too far, pop next one */ if (unlikely(*(float*)&stackPtr->dist > ray.tfar)) continue; /* downtraversal loop */ while (true) { size_t mask; ssef tNear; /*! stop if we found a leaf node */ if (unlikely(cur.isLeaf(types))) break; STAT3(normal.trav_nodes,1,1,1); /* process standard nodes */ if (likely(cur.isNode(types))) mask = cur.node()->intersect<robust>(nearX,nearY,nearZ,org,rdir,org_rdir,ray_near,ray_far,tNear); /* process motion blur nodes */ else if (likely(cur.isNodeMB(types))) mask = cur.nodeMB()->intersect(nearX,nearY,nearZ,org,rdir,org_rdir,ray_near,ray_far,ray.time,tNear); /*! process nodes with unaligned bounds */ else if (unlikely(cur.isUnalignedNode(types))) mask = cur.unalignedNode()->intersect(org,dir,ray_near,ray_far,tNear); /*! process nodes with unaligned bounds and motion blur */ else if (unlikely(cur.isUnalignedNodeMB(types))) mask = cur.unalignedNodeMB()->intersect(pre1,org,dir,ray_near,ray_far,ray.time,tNear); /*! if no child is hit, pop next node */ const BVH4::BaseNode* node = cur.baseNode(types); if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); cur.prefetch(types); assert(cur != BVH4::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); c0.prefetch(types); const unsigned int d0 = ((unsigned int*)&tNear)[r]; r = __bscf(mask); NodeRef c1 = node->child(r); c1.prefetch(types); const unsigned int d1 = ((unsigned int*)&tNear)[r]; assert(c0 != BVH4::emptyNode); assert(c1 != BVH4::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; continue; } else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; continue; } } /*! Here starts the slow path for 3 or 4 hit children. We push * all nodes onto the stack to sort them there. */ assert(stackPtr < stackEnd); stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; assert(stackPtr < stackEnd); stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); NodeRef c = node->child(r); c.prefetch(types); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; assert(c != BVH4::emptyNode); if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); c = node->child(r); c.prefetch(types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; assert(c != BVH4::emptyNode); sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; } /*! this is a leaf node */ assert(cur != BVH4::emptyNode); STAT3(normal.trav_leaves,1,1,1); size_t num; Primitive* prim = (Primitive*) cur.leaf(num); PrimitiveIntersector::intersect(pre,ray,prim,num,bvh->geometry); ray_far = ray.tfar; } AVX_ZERO_UPPER(); }
void BVH4Intersector1<types,robust,PrimitiveIntersector>::occluded(const BVH4* bvh, Ray& ray) { /*! perform per ray precalculations required by the primitive intersector */ Precalculations pre(ray); BVH4::UnalignedNodeMB::Precalculations pre1(ray); /*! stack state */ NodeRef stack[stackSize]; //!< stack of nodes that still need to get traversed NodeRef* stackPtr = stack+1; //!< current stack pointer NodeRef* stackEnd = stack+stackSize; stack[0] = bvh->root; /*! load the ray into SIMD registers */ const Vec3fa ray_rdir = rcp_safe(ray.dir); const Vec3fa ray_org_rdir = ray.org*ray_rdir; const sse3f org(ray.org.x,ray.org.y,ray.org.z); const sse3f dir(ray.dir.x,ray.dir.y,ray.dir.z); const sse3f rdir(ray_rdir.x,ray_rdir.y,ray_rdir.z); const sse3f org_rdir(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z); const ssef ray_near(ray.tnear); ssef ray_far(ray.tfar); /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray_rdir.x >= 0 ? 0*sizeof(ssef) : 1*sizeof(ssef); const size_t nearY = ray_rdir.y >= 0 ? 2*sizeof(ssef) : 3*sizeof(ssef); const size_t nearZ = ray_rdir.z >= 0 ? 4*sizeof(ssef) : 5*sizeof(ssef); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = (NodeRef) *stackPtr; /* downtraversal loop */ while (true) { size_t mask; ssef tNear; /*! stop if we found a leaf node */ if (unlikely(cur.isLeaf(types))) break; STAT3(shadow.trav_nodes,1,1,1); /* process standard nodes */ if (likely(cur.isNode(types))) mask = cur.node()->intersect<robust>(nearX,nearY,nearZ,org,rdir,org_rdir,ray_near,ray_far,tNear); /* process motion blur nodes */ else if (likely(cur.isNodeMB(types))) mask = cur.nodeMB()->intersect(nearX,nearY,nearZ,org,rdir,org_rdir,ray_near,ray_far,ray.time,tNear); /*! process nodes with unaligned bounds */ else if (unlikely(cur.isUnalignedNode(types))) mask = cur.unalignedNode()->intersect(org,dir,ray_near,ray_far,tNear); /*! process nodes with unaligned bounds and motion blur */ else if (unlikely(cur.isUnalignedNodeMB(types))) mask = cur.unalignedNodeMB()->intersect(pre1,org,dir,ray_near,ray_far,ray.time,tNear); /*! if no child is hit, pop next node */ const BVH4::BaseNode* node = cur.baseNode(types); if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); cur.prefetch(types); assert(cur != BVH4::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); c0.prefetch(types); const unsigned int d0 = ((unsigned int*)&tNear)[r]; r = __bscf(mask); NodeRef c1 = node->child(r); c1.prefetch(types); const unsigned int d1 = ((unsigned int*)&tNear)[r]; assert(c0 != BVH4::emptyNode); assert(c1 != BVH4::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { *stackPtr = c1; stackPtr++; cur = c0; continue; } else { *stackPtr = c0; stackPtr++; cur = c1; continue; } } assert(stackPtr < stackEnd); *stackPtr = c0; stackPtr++; assert(stackPtr < stackEnd); *stackPtr = c1; stackPtr++; /*! three children are hit */ r = __bscf(mask); cur = node->child(r); cur.prefetch(types); assert(cur != BVH4::emptyNode); if (likely(mask == 0)) continue; assert(stackPtr < stackEnd); *stackPtr = cur; stackPtr++; /*! four children are hit */ cur = node->child(3); cur.prefetch(types); assert(cur != BVH4::emptyNode); } /*! this is a leaf node */ assert(cur != BVH4::emptyNode); STAT3(shadow.trav_leaves,1,1,1); size_t num; Primitive* prim = (Primitive*) cur.leaf(num); if (PrimitiveIntersector::occluded(pre,ray,prim,num,bvh->geometry)) { ray.geomID = 0; break; } } AVX_ZERO_UPPER(); }