void BVH4Intersector4Single<types,robust,PrimitiveIntersector4>::occluded(sseb* valid_i, BVH4* bvh, Ray4& ray) { /* load ray */ const sseb valid = *valid_i; sseb terminated = !valid; sse3f ray_org = ray.org, ray_dir = ray.dir; ssef ray_tnear = ray.tnear, ray_tfar = ray.tfar; const sse3f rdir = rcp_safe(ray_dir); const sse3f org(ray_org), org_rdir = org * rdir; ray_tnear = select(valid,ray_tnear,ssef(pos_inf)); ray_tfar = select(valid,ray_tfar ,ssef(neg_inf)); const ssef inf = ssef(pos_inf); Precalculations pre(valid,ray); /* compute near/far per ray */ sse3i nearXYZ; nearXYZ.x = select(rdir.x >= 0.0f,ssei(0*(int)sizeof(ssef)),ssei(1*(int)sizeof(ssef))); nearXYZ.y = select(rdir.y >= 0.0f,ssei(2*(int)sizeof(ssef)),ssei(3*(int)sizeof(ssef))); nearXYZ.z = select(rdir.z >= 0.0f,ssei(4*(int)sizeof(ssef)),ssei(5*(int)sizeof(ssef))); /* we have no packet implementation for OBB nodes yet */ size_t bits = movemask(valid); for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) { if (occluded1(bvh,bvh->root,i,pre,ray,ray_org,ray_dir,rdir,ray_tnear,ray_tfar,nearXYZ)) terminated[i] = -1; } store4i(valid & terminated,&ray.geomID,0); AVX_ZERO_UPPER(); }
void BVH4Intersector8Single<types,robust,PrimitiveIntersector8>::intersect(avxb* valid_i, BVH4* bvh, Ray8& ray) { /* load ray */ const avxb valid0 = *valid_i; avx3f ray_org = ray.org; avx3f ray_dir = ray.dir; avxf ray_tnear = ray.tnear, ray_tfar = ray.tfar; const avx3f rdir = rcp_safe(ray_dir); const avx3f org(ray_org), org_rdir = org * rdir; ray_tnear = select(valid0,ray_tnear,avxf(pos_inf)); ray_tfar = select(valid0,ray_tfar ,avxf(neg_inf)); const avxf inf = avxf(pos_inf); Precalculations pre(valid0,ray); /* compute near/far per ray */ avx3i nearXYZ; nearXYZ.x = select(rdir.x >= 0.0f,avxi(0*(int)sizeof(ssef)),avxi(1*(int)sizeof(ssef))); nearXYZ.y = select(rdir.y >= 0.0f,avxi(2*(int)sizeof(ssef)),avxi(3*(int)sizeof(ssef))); nearXYZ.z = select(rdir.z >= 0.0f,avxi(4*(int)sizeof(ssef)),avxi(5*(int)sizeof(ssef))); /* we have no packet implementation for OBB nodes yet */ size_t bits = movemask(valid0); for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) { intersect1(bvh, bvh->root, i, pre, ray, ray_org, ray_dir, rdir, ray_tnear, ray_tfar, nearXYZ); } AVX_ZERO_UPPER(); }
void BVH4Intersector4FromIntersector1<Intersector1>::intersect(sseb* valid_i, BVH4* bvh, Ray4& ray) { Ray rays[4]; ray.get(rays); size_t bits = movemask(*valid_i); for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) { Intersector1::intersect(bvh,rays[i]); } ray.set(rays); AVX_ZERO_UPPER(); }
INLINE bool RTCameraPacketGen::generateIA(RayPacket &pckt, int x, int y) const { const ssef fw = (float) pckt.width; const ssef fh = (float) pckt.height; const ssef fx = (float) x; const ssef fy = (float) y; const ssef bottomLeft = aImagePlaneOrg + fx * axAxis + fy * azAxis; const ssef bottomRight = bottomLeft + fw * axAxis; const ssef topRight = bottomRight + fh * azAxis; const ssef topLeft = bottomLeft + fh * azAxis; const ssef dmin = fixup(min(min(bottomLeft, bottomRight), min(topLeft, topRight))); const ssef dmax = fixup(max(max(bottomLeft, bottomRight), max(topLeft, topRight))); const ssef rcpMin = rcp(dmax).xyzz(); // avoid issues with unused w const ssef rcpMax = rcp(dmin).xyzz(); // avoid issues with unused w const ssef minusMin = -rcpMin; const ssef minusMax = -rcpMax; const size_t mask = movemask(rcpMin); const sseb maskv = unmovemask(mask); pckt.iasign = maskv; pckt.iaMinrDir = select(maskv, minusMax, rcpMin); pckt.iaMaxrDir = select(maskv, minusMin, rcpMax); return movemask(dmin ^ dmax) == 0; }
/* ray/box intersection */ __forceinline size_t intersectBox(const avx3f& org, const avx3f& rdir, const avxf& tnear, const avxf& tfar, const BVH2::Node* node, const int i) { const avxf dminx = (avxf(node->lower_upper_x[i+0]) - org.x) * rdir.x; const avxf dminy = (avxf(node->lower_upper_y[i+0]) - org.y) * rdir.y; const avxf dminz = (avxf(node->lower_upper_z[i+0]) - org.z) * rdir.z; const avxf dmaxx = (avxf(node->lower_upper_x[i+2]) - org.x) * rdir.x; const avxf dmaxy = (avxf(node->lower_upper_y[i+2]) - org.y) * rdir.y; const avxf dmaxz = (avxf(node->lower_upper_z[i+2]) - org.z) * rdir.z; const avxf dlowerx = min(dminx,dmaxx); const avxf dlowery = min(dminy,dmaxy); const avxf dlowerz = min(dminz,dmaxz); const avxf dupperx = max(dminx,dmaxx); const avxf duppery = max(dminy,dmaxy); const avxf dupperz = max(dminz,dmaxz); const avxf near = max(dlowerx,dlowery,dlowerz,tnear); const avxf far = min(dupperx,duppery,dupperz,tfar ); return movemask(near <= far); }
void BVH4MBIntersector1<TriangleIntersector>::intersect(const BVH4MB* bvh, Ray& ray) { AVX_ZERO_UPPER(); STAT3(normal.travs,1,1,1); /*! stack state */ Base* popCur = bvh->root; //!< pre-popped top node from the stack float popDist = neg_inf; //!< pre-popped distance of top node from the stack StackItem stack[1+3*BVH4MB::maxDepth]; //!< stack of nodes that still need to get traversed StackItem* stackPtr = stack+1; //!< current stack pointer /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray.dir.x >= 0 ? 0*2*sizeof(ssef) : 1*2*sizeof(ssef); const size_t nearY = ray.dir.y >= 0 ? 2*2*sizeof(ssef) : 3*2*sizeof(ssef); const size_t nearZ = ray.dir.z >= 0 ? 4*2*sizeof(ssef) : 5*2*sizeof(ssef); const size_t farX = nearX ^ 32; const size_t farY = nearY ^ 32; const size_t farZ = nearZ ^ 32; /*! load the ray into SIMD registers */ const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const Vec3fa ray_rdir = rcp_safe(ray.dir); const sse3f rdir(ray_rdir.x,ray_rdir.y,ray_rdir.z); const ssef rayNear(ray.tnear); ssef rayFar(ray.tfar); while (true) { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; Base* cur = popCur; /*! if popped node is too far, pop next one */ if (unlikely(popDist > ray.tfar)) { popCur = (Base*)stackPtr[-1].ptr; popDist = stackPtr[-1].dist; continue; } next: /*! we mostly go into the inner node case */ if (likely(cur->isNode())) { STAT3(normal.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur->node(); const ssef* pNearX = (const ssef*)((const char*)node+nearX); const ssef* pNearY = (const ssef*)((const char*)node+nearY); const ssef* pNearZ = (const ssef*)((const char*)node+nearZ); const ssef tNearX = (norg.x + ssef(pNearX[0]) + ray.time*pNearX[1]) * rdir.x; const ssef tNearY = (norg.y + ssef(pNearY[0]) + ray.time*pNearY[1]) * rdir.y; const ssef tNearZ = (norg.z + ssef(pNearZ[0]) + ray.time*pNearZ[1]) * rdir.z; const ssef tNear = max(tNearX,tNearY,tNearZ,rayNear); const ssef* pFarX = (const ssef*)((const char*)node+farX); const ssef* pFarY = (const ssef*)((const char*)node+farY); const ssef* pFarZ = (const ssef*)((const char*)node+farZ); const ssef tFarX = (norg.x + ssef(pFarX[0]) + ray.time*pFarX[1]) * rdir.x; const ssef tFarY = (norg.y + ssef(pFarY[0]) + ray.time*pFarY[1]) * rdir.y; const ssef tFarZ = (norg.z + ssef(pFarZ[0]) + ray.time*pFarZ[1]) * rdir.z; popCur = (Base*) stackPtr[-1].ptr; //!< pre-pop of topmost stack item popDist = stackPtr[-1].dist; //!< pre-pop of distance of topmost stack item const ssef tFar = min(tFarX,tFarY,tFarZ,rayFar); size_t _hit = movemask(tNear <= tFar); /*! if no child is hit, pop next node */ if (unlikely(_hit == 0)) continue; /*! one child is hit, continue with that child */ size_t r = __bsf(_hit); _hit = __btc(_hit,r); if (likely(_hit == 0)) { cur = node->child[r]; goto next; } /*! two children are hit, push far child, and continue with closer child */ Base* c0 = node->child[r]; const float d0 = tNear[r]; r = __bsf(_hit); _hit = __btc(_hit,r); Base* c1 = node->child[r]; const float d1 = tNear[r]; if (likely(_hit == 0)) { if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; goto next; } else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; goto next; } } /*! Here starts the slow path for 3 or 4 hit children. We push * all nodes onto the stack to sort them there. */ stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ r = __bsf(_hit); _hit = __btc(_hit,r); Base* c = node->child[r]; float d = tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; if (likely(_hit == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); cur = (Base*) stackPtr[-1].ptr; stackPtr--; goto next; } /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ r = __bsf(_hit); _hit = __btc(_hit,r); c = node->child[r]; d = tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); cur = (Base*) stackPtr[-1].ptr; stackPtr--; goto next; } /*! this is a leaf node */ else { STAT3(normal.trav_leaves,1,1,1); size_t num; Triangle* tri = (Triangle*) cur->leaf(num); for (size_t i=0; i<num; i++) TriangleIntersector::intersect(ray,tri[i],bvh->geometry); popCur = (Base*) stackPtr[-1].ptr; //!< pre-pop of topmost stack item popDist = stackPtr[-1].dist; //!< pre-pop of distance of topmost stack item rayFar = ray.tfar; } } AVX_ZERO_UPPER(); }
__forceinline bool BVH8iIntersector8Hybrid<TriangleIntersector8>::occluded1(const BVH8i* bvh, NodeRef root, const size_t k, Ray8& ray,const avx3f &ray_org, const avx3f &ray_dir, const avx3f &ray_rdir, const avxf &ray_tnear, const avxf &ray_tfar, const avx3i& nearXYZ) { /*! stack state */ NodeRef stack[stackSizeSingle]; //!< stack of nodes that still need to get traversed NodeRef* stackPtr = stack+1; //!< current stack pointer NodeRef* stackEnd = stack+stackSizeSingle; stack[0] = root; /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = nearXYZ.x[k]; const size_t nearY = nearXYZ.y[k]; const size_t nearZ = nearXYZ.z[k]; /*! load the ray into SIMD registers */ const avx3f org (ray_org .x[k],ray_org .y[k],ray_org .z[k]); const avx3f rdir(ray_rdir.x[k],ray_rdir.y[k],ray_rdir.z[k]); const avx3f norg = -org, org_rdir(org*rdir); const avxf rayNear(ray_tnear[k]), rayFar(ray_tfar[k]); const Node * __restrict__ nodes = (Node *)bvh->nodePtr(); const Triangle * __restrict__ accel = (Triangle*)bvh->triPtr(); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = (NodeRef) *stackPtr; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(shadow.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = (Node*)cur.node(nodes); const size_t farX = nearX ^ sizeof(avxf), farY = nearY ^ sizeof(avxf), farZ = nearZ ^ sizeof(avxf); #if defined (__AVX2__) const avxf tNearX = msub(load8f((const char*)node+nearX), rdir.x, org_rdir.x); const avxf tNearY = msub(load8f((const char*)node+nearY), rdir.y, org_rdir.y); const avxf tNearZ = msub(load8f((const char*)node+nearZ), rdir.z, org_rdir.z); const avxf tFarX = msub(load8f((const char*)node+farX ), rdir.x, org_rdir.x); const avxf tFarY = msub(load8f((const char*)node+farY ), rdir.y, org_rdir.y); const avxf tFarZ = msub(load8f((const char*)node+farZ ), rdir.z, org_rdir.z); #else const avxf tNearX = (norg.x + load8f((const char*)node+nearX)) * rdir.x; const avxf tNearY = (norg.y + load8f((const char*)node+nearY)) * rdir.y; const avxf tNearZ = (norg.z + load8f((const char*)node+nearZ)) * rdir.z; const avxf tFarX = (norg.x + load8f((const char*)node+farX )) * rdir.x; const avxf tFarY = (norg.y + load8f((const char*)node+farY )) * rdir.y; const avxf tFarZ = (norg.z + load8f((const char*)node+farZ )) * rdir.z; #endif #if defined(__AVX2__) const avxf tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,rayNear)); const avxf tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,rayFar )); const avxb vmask = cast(tNear) > cast(tFar); unsigned int mask = movemask(vmask)^0xff; #else const avxf tNear = max(tNearX,tNearY,tNearZ,rayNear); const avxf tFar = min(tFarX ,tFarY ,tFarZ ,rayFar); const avxb vmask = tNear <= tFar; unsigned int mask = movemask(vmask); #endif /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); assert(cur != BVH4i::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const unsigned int d0 = ((unsigned int*)&tNear)[r]; r = __bscf(mask); NodeRef c1 = node->child(r); const unsigned int d1 = ((unsigned int*)&tNear)[r]; assert(c0 != BVH4i::emptyNode); assert(c1 != BVH4i::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { *stackPtr = c1; stackPtr++; cur = c0; continue; } else { *stackPtr = c0; stackPtr++; cur = c1; continue; } } assert(stackPtr < stackEnd); *stackPtr = c0; stackPtr++; assert(stackPtr < stackEnd); *stackPtr = c1; stackPtr++; /*! three children are hit */ r = __bscf(mask); cur = node->child(r); assert(cur != BVH4i::emptyNode); if (likely(mask == 0)) continue; while(1) { r = __bscf(mask); NodeRef c = node->child(r); *stackPtr = c; stackPtr++; if (unlikely(mask == 0)) break; } cur = (NodeRef) stackPtr[-1]; stackPtr--; // assert(stackPtr < stackEnd); // *stackPtr = cur; stackPtr++; // /*! four children are hit */ // cur = node->child(3); // assert(cur != BVH4i::emptyNode); } /*! this is a leaf node */ STAT3(shadow.trav_leaves,1,1,1); size_t num; Triangle* prim = (Triangle*) cur.leaf(accel,num); if (TriangleIntersector8::occluded(ray,k,prim,num,bvh->geometry)) { ray.geomID[k] = 0; return true; } } return false; }
void BVH4Intersector4Hybrid<types,robust,PrimitiveIntersector4>::intersect(bool4* valid_i, BVH4* bvh, Ray4& ray) { /* verify correct input */ bool4 valid0 = *valid_i; #if defined(RTCORE_IGNORE_INVALID_RAYS) valid0 &= ray.valid(); #endif assert(all(valid0,ray.tnear > -FLT_MIN)); assert(!(types & BVH4::FLAG_NODE_MB) || all(valid0,ray.time >= 0.0f & ray.time <= 1.0f)); /* load ray */ Vec3f4 ray_org = ray.org; Vec3f4 ray_dir = ray.dir; float4 ray_tnear = ray.tnear, ray_tfar = ray.tfar; const Vec3f4 rdir = rcp_safe(ray_dir); const Vec3f4 org(ray_org), org_rdir = org * rdir; ray_tnear = select(valid0,ray_tnear,float4(pos_inf)); ray_tfar = select(valid0,ray_tfar ,float4(neg_inf)); const float4 inf = float4(pos_inf); Precalculations pre(valid0,ray); /* compute near/far per ray */ Vec3i4 nearXYZ; nearXYZ.x = select(rdir.x >= 0.0f,int4(0*(int)sizeof(float4)),int4(1*(int)sizeof(float4))); nearXYZ.y = select(rdir.y >= 0.0f,int4(2*(int)sizeof(float4)),int4(3*(int)sizeof(float4))); nearXYZ.z = select(rdir.z >= 0.0f,int4(4*(int)sizeof(float4)),int4(5*(int)sizeof(float4))); /* allocate stack and push root node */ float4 stack_near[stackSizeChunk]; NodeRef stack_node[stackSizeChunk]; stack_node[0] = BVH4::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* stackEnd = stack_node+stackSizeChunk; NodeRef* __restrict__ sptr_node = stack_node + 2; float4* __restrict__ sptr_near = stack_near + 2; while (1) pop: { /* pop next node from stack */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; NodeRef cur = *sptr_node; if (unlikely(cur == BVH4::invalidNode)) { assert(sptr_node == stack_node); break; } /* cull node if behind closest hit point */ float4 curDist = *sptr_near; const bool4 active = curDist < ray_tfar; if (unlikely(none(active))) continue; /* switch to single ray traversal */ #if !defined(__WIN32__) || defined(__X86_64__) size_t bits = movemask(active); if (unlikely(__popcnt(bits) <= SWITCH_THRESHOLD)) { for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) { BVH4Intersector4Single<types,robust,PrimitiveIntersector4>::intersect1(bvh, cur, i, pre, ray, ray_org, ray_dir, rdir, ray_tnear, ray_tfar, nearXYZ); } ray_tfar = min(ray_tfar,ray.tfar); continue; } #endif while (1) { /* process normal nodes */ if (likely((types & 0x1) && cur.isNode())) { const bool4 valid_node = ray_tfar > curDist; STAT3(normal.trav_nodes,1,popcnt(valid_node),4); const Node* __restrict__ const node = cur.node(); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; cur = *sptr_node; curDist = *sptr_near; #pragma unroll(4) for (unsigned i=0; i<BVH4::N; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH4::emptyNode)) break; float4 lnearP; const bool4 lhit = intersect_node<robust>(node,i,org,rdir,org_rdir,ray_tnear,ray_tfar,lnearP); /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); assert(child != BVH4::emptyNode); const float4 childDist = select(lhit,lnearP,inf); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = cur; *(sptr_near-1) = curDist; curDist = childDist; cur = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } #if SWITCH_DURING_DOWN_TRAVERSAL == 1 // seems to be the best place for testing utilization if (unlikely(popcnt(ray_tfar > curDist) <= SWITCH_THRESHOLD)) { *sptr_node++ = cur; *sptr_near++ = curDist; goto pop; } #endif } /* process motion blur nodes */ else if (likely((types & 0x10) && cur.isNodeMB())) { const bool4 valid_node = ray_tfar > curDist; STAT3(normal.trav_nodes,1,popcnt(valid_node),4); const BVH4::NodeMB* __restrict__ const node = cur.nodeMB(); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; cur = *sptr_node; curDist = *sptr_near; #pragma unroll(4) for (unsigned i=0; i<BVH4::N; i++) { const NodeRef child = node->child(i); if (unlikely(child == BVH4::emptyNode)) break; float4 lnearP; const bool4 lhit = intersect_node(node,i,org,rdir,org_rdir,ray_tnear,ray_tfar,ray.time,lnearP); /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); assert(child != BVH4::emptyNode); const float4 childDist = select(lhit,lnearP,inf); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = cur; *(sptr_near-1) = curDist; curDist = childDist; cur = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } #if SWITCH_DURING_DOWN_TRAVERSAL == 1 // seems to be the best place for testing utilization if (unlikely(popcnt(ray_tfar > curDist) <= SWITCH_THRESHOLD)) { *sptr_node++ = cur; *sptr_near++ = curDist; goto pop; } #endif } else break; }
bool BVH4iIntersector1<TriangleIntersector>::occluded(const BVH4iIntersector1* This, Ray& ray) { AVX_ZERO_UPPER(); STAT3(shadow.travs,1,1,1); /*! stack state */ const BVH4i* bvh = This->bvh; NodeRef stack[1+3*BVH4i::maxDepth]; //!< stack of nodes that still need to get traversed NodeRef* stackPtr = stack+1; //!< current stack pointer stack[0] = bvh->root; /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray.dir.x >= 0 ? 0*sizeof(ssef_m) : 1*sizeof(ssef_m); const size_t nearY = ray.dir.y >= 0 ? 2*sizeof(ssef_m) : 3*sizeof(ssef_m); const size_t nearZ = ray.dir.z >= 0 ? 4*sizeof(ssef_m) : 5*sizeof(ssef_m); /*! load the ray into SIMD registers */ const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const Vector3f ray_rdir = rcp_safe(ray.dir); const sse3f rdir(ray_rdir.x,ray_rdir.y,ray_rdir.z); const Vector3f ray_org_rdir = ray.org*ray_rdir; const sse3f org_rdir(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z); const ssef rayNear(ray.tnear); const ssef rayFar(ray.tfar); const void* nodePtr = bvh->nodePtr(); const void* triPtr = bvh->triPtr(); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = (NodeRef) *stackPtr; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(shadow.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(nodePtr); const size_t farX = nearX ^ 16, farY = nearY ^ 16, farZ = nearZ ^ 16; #if defined (__AVX2__) const ssef tNearX = msub(ssef((const char*)nodePtr+(size_t)cur+nearX), rdir.x, org_rdir.x); const ssef tNearY = msub(ssef((const char*)nodePtr+(size_t)cur+nearY), rdir.y, org_rdir.y); const ssef tNearZ = msub(ssef((const char*)nodePtr+(size_t)cur+nearZ), rdir.z, org_rdir.z); const ssef tFarX = msub(ssef((const char*)nodePtr+(size_t)cur+farX ), rdir.x, org_rdir.x); const ssef tFarY = msub(ssef((const char*)nodePtr+(size_t)cur+farY ), rdir.y, org_rdir.y); const ssef tFarZ = msub(ssef((const char*)nodePtr+(size_t)cur+farZ ), rdir.z, org_rdir.z); #else const ssef tNearX = (norg.x + ssef((const char*)nodePtr+(size_t)cur+nearX)) * rdir.x; const ssef tNearY = (norg.y + ssef((const char*)nodePtr+(size_t)cur+nearY)) * rdir.y; const ssef tNearZ = (norg.z + ssef((const char*)nodePtr+(size_t)cur+nearZ)) * rdir.z; const ssef tFarX = (norg.x + ssef((const char*)nodePtr+(size_t)cur+farX )) * rdir.x; const ssef tFarY = (norg.y + ssef((const char*)nodePtr+(size_t)cur+farY )) * rdir.y; const ssef tFarZ = (norg.z + ssef((const char*)nodePtr+(size_t)cur+farZ )) * rdir.z; #endif const ssef tNear = max(tNearX,tNearY,tNearZ,rayNear); const ssef tFar = min(tFarX ,tFarY ,tFarZ ,rayFar); size_t mask = movemask(tNear <= tFar); /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bsf(mask); mask = __btc(mask,r); if (likely(mask == 0)) { cur = node->child(r); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const float d0 = tNear[r]; r = __bsf(mask); mask = __btc(mask,r); NodeRef c1 = node->child(r); const float d1 = tNear[r]; if (likely(mask == 0)) { if (d0 < d1) { *stackPtr = c1; stackPtr++; cur = c0; continue; } else { *stackPtr = c0; stackPtr++; cur = c1; continue; } } *stackPtr = c0; stackPtr++; *stackPtr = c1; stackPtr++; /*! three children are hit */ r = __bsf(mask); mask = __btc(mask,r); cur = node->child(r); *stackPtr = cur; stackPtr++; if (likely(mask == 0)) { stackPtr--; continue; } /*! four children are hit */ cur = node->child(3); } /*! this is a leaf node */ STAT3(shadow.trav_leaves,1,1,1); size_t num; Triangle* tri = (Triangle*) cur.leaf(triPtr,num); for (size_t i=0; i<num; i++) { if (TriangleIntersector::occluded(ray,tri[i],bvh->vertices)) { AVX_ZERO_UPPER(); return true; } } } AVX_ZERO_UPPER(); return false; }
void BVH8Intersector1<robust,PrimitiveIntersector>::intersect(const BVH8* bvh, Ray& ray) { /*! perform per ray precalculations required by the primitive intersector */ Precalculations pre(ray,bvh); /*! stack state */ StackItemT<NodeRef> stack[stackSize]; //!< stack of nodes StackItemT<NodeRef>* stackPtr = stack+1; //!< current stack pointer StackItemT<NodeRef>* stackEnd = stack+stackSize; stack[0].ptr = bvh->root; stack[0].dist = neg_inf; /* filter out invalid rays */ #if defined(RTCORE_IGNORE_INVALID_RAYS) if (!ray.valid()) return; #endif /* verify correct input */ assert(ray.tnear > -FLT_MIN); //assert(!(types & BVH4::FLAG_NODE_MB) || (ray.time >= 0.0f && ray.time <= 1.0f)); /*! load the ray into SIMD registers */ const Vec3f8 norg(-ray.org.x,-ray.org.y,-ray.org.z); const Vec3fa ray_rdir = rcp_safe(ray.dir); const Vec3f8 rdir(ray_rdir.x,ray_rdir.y,ray_rdir.z); const Vec3fa ray_org_rdir = ray.org*ray_rdir; const Vec3f8 org_rdir(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z); const float8 ray_near(ray.tnear); float8 ray_far(ray.tfar); /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray_rdir.x >= 0.0f ? 0*sizeof(float8) : 1*sizeof(float8); const size_t nearY = ray_rdir.y >= 0.0f ? 2*sizeof(float8) : 3*sizeof(float8); const size_t nearZ = ray_rdir.z >= 0.0f ? 4*sizeof(float8) : 5*sizeof(float8); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = NodeRef(stackPtr->ptr); /*! if popped node is too far, pop next one */ if (unlikely(*(float*)&stackPtr->dist > ray.tfar)) continue; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(normal.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(); const size_t farX = nearX ^ sizeof(float8), farY = nearY ^ sizeof(float8), farZ = nearZ ^ sizeof(float8); #if defined (__AVX2__) const float8 tNearX = msub(load8f((const char*)node+nearX), rdir.x, org_rdir.x); const float8 tNearY = msub(load8f((const char*)node+nearY), rdir.y, org_rdir.y); const float8 tNearZ = msub(load8f((const char*)node+nearZ), rdir.z, org_rdir.z); const float8 tFarX = msub(load8f((const char*)node+farX ), rdir.x, org_rdir.x); const float8 tFarY = msub(load8f((const char*)node+farY ), rdir.y, org_rdir.y); const float8 tFarZ = msub(load8f((const char*)node+farZ ), rdir.z, org_rdir.z); #else const float8 tNearX = (norg.x + load8f((const char*)node+nearX)) * rdir.x; const float8 tNearY = (norg.y + load8f((const char*)node+nearY)) * rdir.y; const float8 tNearZ = (norg.z + load8f((const char*)node+nearZ)) * rdir.z; const float8 tFarX = (norg.x + load8f((const char*)node+farX )) * rdir.x; const float8 tFarY = (norg.y + load8f((const char*)node+farY )) * rdir.y; const float8 tFarZ = (norg.z + load8f((const char*)node+farZ )) * rdir.z; #endif const float round_down = 1.0f-2.0f*float(ulp); const float round_up = 1.0f+2.0f*float(ulp); #if defined(__AVX2__) const float8 tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,ray_near)); const float8 tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,ray_far )); const bool8 vmask = robust ? (round_down*tNear > round_up*tFar) : cast(tNear) > cast(tFar); size_t mask = movemask(vmask)^0xff; #else const float8 tNear = max(tNearX,tNearY,tNearZ,ray_near); const float8 tFar = min(tFarX ,tFarY ,tFarZ ,ray_far); const bool8 vmask = robust ? (round_down*tNear > round_up*tFar) : tNear <= tFar; size_t mask = movemask(vmask); #endif /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); cur.prefetch(); assert(cur != BVH8::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); c0.prefetch(); const unsigned int d0 = ((unsigned int*)&tNear)[r]; r = __bscf(mask); NodeRef c1 = node->child(r); c1.prefetch(); const unsigned int d1 = ((unsigned int*)&tNear)[r]; assert(c0 != BVH8::emptyNode); assert(c1 != BVH8::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; continue; } else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; continue; } } /*! Here starts the slow path for 3 or 4 hit children. We push * all nodes onto the stack to sort them there. */ assert(stackPtr < stackEnd); stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; assert(stackPtr < stackEnd); stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); NodeRef c = node->child(r); c.prefetch(); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; assert(c != BVH8::emptyNode); if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ r = __bscf(mask); c = node->child(r); c.prefetch(); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } /*! fallback case if more than 4 children are hit */ while (1) { r = __bscf(mask); assert(stackPtr < stackEnd); c = node->child(r); c.prefetch(); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; if (unlikely(mask == 0)) break; } cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; } /*! this is a leaf node */ assert(cur != BVH8::emptyNode); STAT3(normal.trav_leaves,1,1,1); size_t num; Primitive* prim = (Primitive*) cur.leaf(num); size_t lazy_node = 0; PrimitiveIntersector::intersect(pre,ray,prim,num,bvh->scene,lazy_node); ray_far = ray.tfar; if (unlikely(lazy_node)) { stackPtr->ptr = lazy_node; stackPtr->dist = inf; stackPtr++; } } AVX_ZERO_UPPER(); }
void BVH8Intersector1<robust,PrimitiveIntersector>::occluded(const BVH8* bvh, Ray& ray) { /*! perform per ray precalculations required by the primitive intersector */ Precalculations pre(ray,bvh); /*! stack state */ NodeRef stack[stackSize]; //!< stack of nodes that still need to get traversed NodeRef* stackPtr = stack+1; //!< current stack pointer NodeRef* stackEnd = stack+stackSize; stack[0] = bvh->root; /* filter out invalid rays */ #if defined(RTCORE_IGNORE_INVALID_RAYS) if (!ray.valid()) return; #endif /* verify correct input */ assert(ray.tnear > -FLT_MIN); //assert(!(types & BVH4::FLAG_NODE_MB) || (ray.time >= 0.0f && ray.time <= 1.0f)); /*! load the ray into SIMD registers */ const Vec3f8 norg(-ray.org.x,-ray.org.y,-ray.org.z); const Vec3fa ray_rdir = rcp_safe(ray.dir); const Vec3f8 rdir(ray_rdir.x,ray_rdir.y,ray_rdir.z); const Vec3fa ray_org_rdir = ray.org*ray_rdir; const Vec3f8 org_rdir(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z); const float8 ray_near(ray.tnear); float8 ray_far(ray.tfar); /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray_rdir.x >= 0 ? 0*sizeof(float8) : 1*sizeof(float8); const size_t nearY = ray_rdir.y >= 0 ? 2*sizeof(float8) : 3*sizeof(float8); const size_t nearZ = ray_rdir.z >= 0 ? 4*sizeof(float8) : 5*sizeof(float8); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = (NodeRef) *stackPtr; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(shadow.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(); const size_t farX = nearX ^ sizeof(float8), farY = nearY ^ sizeof(float8), farZ = nearZ ^ sizeof(float8); #if defined (__AVX2__) const float8 tNearX = msub(load8f((const char*)node+nearX), rdir.x, org_rdir.x); const float8 tNearY = msub(load8f((const char*)node+nearY), rdir.y, org_rdir.y); const float8 tNearZ = msub(load8f((const char*)node+nearZ), rdir.z, org_rdir.z); const float8 tFarX = msub(load8f((const char*)node+farX ), rdir.x, org_rdir.x); const float8 tFarY = msub(load8f((const char*)node+farY ), rdir.y, org_rdir.y); const float8 tFarZ = msub(load8f((const char*)node+farZ ), rdir.z, org_rdir.z); #else const float8 tNearX = (norg.x + load8f((const char*)node+nearX)) * rdir.x; const float8 tNearY = (norg.y + load8f((const char*)node+nearY)) * rdir.y; const float8 tNearZ = (norg.z + load8f((const char*)node+nearZ)) * rdir.z; const float8 tFarX = (norg.x + load8f((const char*)node+farX )) * rdir.x; const float8 tFarY = (norg.y + load8f((const char*)node+farY )) * rdir.y; const float8 tFarZ = (norg.z + load8f((const char*)node+farZ )) * rdir.z; #endif #if defined(__AVX2__) const float8 tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,ray_near)); const float8 tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,ray_far )); const bool8 vmask = cast(tNear) > cast(tFar); size_t mask = movemask(vmask)^0xff; #else const float8 tNear = max(tNearX,tNearY,tNearZ,ray_near); const float8 tFar = min(tFarX ,tFarY ,tFarZ ,ray_far); const bool8 vmask = tNear <= tFar; size_t mask = movemask(vmask); #endif /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); cur.prefetch(); assert(cur != BVH8::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); c0.prefetch(); const unsigned int d0 = ((unsigned int*)&tNear)[r]; r = __bscf(mask); NodeRef c1 = node->child(r); c1.prefetch(); const unsigned int d1 = ((unsigned int*)&tNear)[r]; assert(c0 != BVH8::emptyNode); assert(c1 != BVH8::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { *stackPtr = c1; stackPtr++; cur = c0; continue; } else { *stackPtr = c0; stackPtr++; cur = c1; continue; } } assert(stackPtr < stackEnd); *stackPtr = c0; stackPtr++; assert(stackPtr < stackEnd); *stackPtr = c1; stackPtr++; /*! three children are hit */ r = __bscf(mask); cur = node->child(r); cur.prefetch(); *stackPtr = cur; stackPtr++; if (likely(mask == 0)) { stackPtr--; continue; } /*! process more than three children */ while(1) { r = __bscf(mask); NodeRef c = node->child(r); c.prefetch(); *stackPtr = c; stackPtr++; if (unlikely(mask == 0)) break; } cur = (NodeRef) stackPtr[-1]; stackPtr--; } /*! this is a leaf node */ assert(cur != BVH8::emptyNode); STAT3(shadow.trav_leaves,1,1,1); size_t num; Primitive* prim = (Primitive*) cur.leaf(num); size_t lazy_node = 0; if (PrimitiveIntersector::occluded(pre,ray,prim,num,bvh->scene,lazy_node)) { ray.geomID = 0; break; } if (unlikely(lazy_node)) { *stackPtr = (NodeRef)lazy_node; stackPtr++; } } AVX_ZERO_UPPER(); }
size_t BVH4MB::rotate(Base* nodeID, size_t depth) { /*! nothing to rotate if we reached a leaf node. */ if (nodeID->isLeaf()) return 0; Node* parent = nodeID->node(); /*! rotate all children first */ ssei cdepth; for (size_t c=0; c<4; c++) cdepth[c] = (int)rotate(parent->child[c],depth+1); /* compute current area of all children */ ssef sizeX = parent->upper_x-parent->lower_x; ssef sizeY = parent->upper_y-parent->lower_y; ssef sizeZ = parent->upper_z-parent->lower_z; ssef childArea = sizeX*(sizeY + sizeZ) + sizeY*sizeZ; /*! transpose node bounds */ ssef plower0,plower1,plower2,plower3; transpose(parent->lower_x,parent->lower_y,parent->lower_z,ssef(zero),plower0,plower1,plower2,plower3); ssef pupper0,pupper1,pupper2,pupper3; transpose(parent->upper_x,parent->upper_y,parent->upper_z,ssef(zero),pupper0,pupper1,pupper2,pupper3); BBox<ssef> other0(plower0,pupper0), other1(plower1,pupper1), other2(plower2,pupper2), other3(plower3,pupper3); /*! Find best rotation. We pick a target child of a first child, and swap this with an other child. We perform the best such swap. */ float bestCost = pos_inf; int bestChild = -1, bestTarget = -1, bestOther = -1; for (size_t c=0; c<4; c++) { /*! ignore leaf nodes as we cannot descent into */ if (parent->child[c]->isLeaf()) continue; Node* child = parent->child[c]->node(); /*! transpose child bounds */ ssef clower0,clower1,clower2,clower3; transpose(child->lower_x,child->lower_y,child->lower_z,ssef(zero),clower0,clower1,clower2,clower3); ssef cupper0,cupper1,cupper2,cupper3; transpose(child->upper_x,child->upper_y,child->upper_z,ssef(zero),cupper0,cupper1,cupper2,cupper3); BBox<ssef> target0(clower0,cupper0), target1(clower1,cupper1), target2(clower2,cupper2), target3(clower3,cupper3); /*! put other0 at each target position */ float cost00 = halfArea3f(merge(other0 ,target1,target2,target3)); float cost01 = halfArea3f(merge(target0,other0 ,target2,target3)); float cost02 = halfArea3f(merge(target0,target1,other0 ,target3)); float cost03 = halfArea3f(merge(target0,target1,target2,other0 )); ssef cost0 = ssef(cost00,cost01,cost02,cost03); ssef min0 = vreduce_min(cost0); int pos0 = (int)__bsf(movemask(min0 == cost0)); /*! put other1 at each target position */ float cost10 = halfArea3f(merge(other1 ,target1,target2,target3)); float cost11 = halfArea3f(merge(target0,other1 ,target2,target3)); float cost12 = halfArea3f(merge(target0,target1,other1 ,target3)); float cost13 = halfArea3f(merge(target0,target1,target2,other1 )); ssef cost1 = ssef(cost10,cost11,cost12,cost13); ssef min1 = vreduce_min(cost1); int pos1 = (int)__bsf(movemask(min1 == cost1)); /*! put other2 at each target position */ float cost20 = halfArea3f(merge(other2 ,target1,target2,target3)); float cost21 = halfArea3f(merge(target0,other2 ,target2,target3)); float cost22 = halfArea3f(merge(target0,target1,other2 ,target3)); float cost23 = halfArea3f(merge(target0,target1,target2,other2 )); ssef cost2 = ssef(cost20,cost21,cost22,cost23); ssef min2 = vreduce_min(cost2); int pos2 = (int)__bsf(movemask(min2 == cost2)); /*! put other3 at each target position */ float cost30 = halfArea3f(merge(other3 ,target1,target2,target3)); float cost31 = halfArea3f(merge(target0,other3 ,target2,target3)); float cost32 = halfArea3f(merge(target0,target1,other3 ,target3)); float cost33 = halfArea3f(merge(target0,target1,target2,other3 )); ssef cost3 = ssef(cost30,cost31,cost32,cost33); ssef min3 = vreduce_min(cost3); int pos3 = (int)__bsf(movemask(min3 == cost3)); /*! find best other child */ ssef otherCost = ssef(extract<0>(min0),extract<0>(min1),extract<0>(min2),extract<0>(min3)); int pos[4] = { pos0,pos1,pos2,pos3 }; sseb valid = ssei(int(depth+1))+cdepth <= ssei(maxDepth); // only select swaps that fulfill depth constraints if (none(valid)) continue; size_t n = select_min(valid,otherCost); float cost = otherCost[n]-childArea[c]; //< increasing the original child bound is bad, decreasing good /*! accept a swap when it reduces cost and is not swapping a node with itself */ if (cost < bestCost && n != c) { bestCost = cost; bestChild = (int)c; bestOther = (int)n; bestTarget = pos[n]; } } /*! if we did not find a swap that improves the SAH then do nothing */ if (bestCost >= 0) return 1+reduce_max(cdepth); /*! perform the best found tree rotation */ Node* child = parent->child[bestChild]->node(); swap(parent,bestOther,child,bestTarget); parent->lower_x[bestChild] = reduce_min(child->lower_x); parent->lower_y[bestChild] = reduce_min(child->lower_y); parent->lower_z[bestChild] = reduce_min(child->lower_z); parent->upper_x[bestChild] = reduce_max(child->upper_x); parent->upper_y[bestChild] = reduce_max(child->upper_y); parent->upper_z[bestChild] = reduce_max(child->upper_z); parent->lower_dx[bestChild] = reduce_min(child->lower_dx); parent->lower_dy[bestChild] = reduce_min(child->lower_dy); parent->lower_dz[bestChild] = reduce_min(child->lower_dz); parent->upper_dx[bestChild] = reduce_max(child->upper_dx); parent->upper_dy[bestChild] = reduce_max(child->upper_dy); parent->upper_dz[bestChild] = reduce_max(child->upper_dz); /*! This returned depth is conservative as the child that was * pulled up in the tree could have been on the critical path. */ cdepth[bestOther]++; // bestOther was pushed down one level return 1+reduce_max(cdepth); }
void BVH4Intersector4Hybrid<TriangleIntersector4>::intersect(const BVH4Intersector4Hybrid* This, Ray4& ray, const __m128 valid_i) { sseb valid = valid_i; const BVH4* bvh = This->bvh; STAT3(normal.travs,1,popcnt(valid),4); NodeRef invalid = (NodeRef)1; /* load ray into registers */ ssef ray_near = select(valid,ray.tnear,pos_inf); ssef ray_far = select(valid,ray.tfar ,neg_inf); sse3f rdir = rcp_safe(ray.dir); ray.tfar = ray_far; /* allocate stack and push root node */ NodeRef stack_node[3*BVH4::maxDepth+1]; ssef stack_near[3*BVH4::maxDepth+1]; stack_node[0] = invalid; stack_near[0] = ssef(inf); stack_node[1] = bvh->root; stack_near[1] = ray_near; NodeRef* sptr_node = stack_node+2; ssef * sptr_near = stack_near+2; while (1) { /* pop next node from stack */ sptr_node--; sptr_near--; ssef curDist = *sptr_near; NodeRef curNode = *sptr_node; if (unlikely(curNode == invalid)) break; /* cull node if behind closest hit point */ const sseb active = curDist < ray_far; if (unlikely(none(active))) continue; /* switch to single ray traversal */ size_t bits = movemask(active); if (unlikely(__popcnt(bits) <= SWITCH_THRESHOLD)) { for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) { BVH4Intersector1<TriangleIntersector1>::intersect1(bvh,curNode,i,ray,rdir); } ray_far = ray.tfar; continue; } while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf())) break; const Node* const node = curNode.node(bvh->nodePtr()); //NodeRef(curNode).node(nodes); /* pop of next node */ sptr_node--; sptr_near--; curNode = *sptr_node; curDist = *sptr_near; for (unsigned i=0; i<4; i++) { const ssef dminx = (ssef(node->lower_x[i]) - ray.org.x) * rdir.x; const ssef dmaxx = (ssef(node->upper_x[i]) - ray.org.x) * rdir.x; const ssef dminy = (ssef(node->lower_y[i]) - ray.org.y) * rdir.y; const ssef dmaxy = (ssef(node->upper_y[i]) - ray.org.y) * rdir.y; const ssef dminz = (ssef(node->lower_z[i]) - ray.org.z) * rdir.z; const ssef dmaxz = (ssef(node->upper_z[i]) - ray.org.z) * rdir.z; const NodeRef child = node->child(i); const ssef dlowerx = min(dminx,dmaxx); const ssef dupperx = max(dminx,dmaxx); const ssef dlowery = min(dminy,dmaxy); const ssef duppery = max(dminy,dmaxy); const ssef dlowerz = min(dminz,dmaxz); const ssef dupperz = max(dminz,dmaxz); const ssef near = max(dlowerx,dlowery,dlowerz,ray_near); const ssef far = min(dupperx,duppery,dupperz,ray_far ); const sseb mhit = near <= far; const ssef childDist = select(mhit,near,inf); const sseb closer = childDist < curDist; /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(mhit))) { //if (child != invalid) { sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(closer)) { *(sptr_node-1) = curNode; *(sptr_near-1) = curDist; curDist = childDist; curNode = child; } /* push hit child onto stack*/ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } } } /* return if stack is empty */ if (unlikely(curNode == invalid)) break; /* decode leaf node */ size_t num; Triangle* tri = (Triangle*) curNode.leaf(bvh->triPtr(),num); /* intersect triangles */ for (size_t i=0; i<num; i++) TriangleIntersector4::intersect(valid,ray,tri[i],bvh->vertices); ray_far = ray.tfar; } }
size_t BVHNRotate<4>::rotate(NodeRef parentRef, size_t depth) { /*! nothing to rotate if we reached a leaf node. */ if (parentRef.isBarrier()) return 0; if (parentRef.isLeaf()) return 0; Node* parent = parentRef.node(); /*! rotate all children first */ vint4 cdepth; for (size_t c=0; c<4; c++) cdepth[c] = (int)rotate(parent->child(c),depth+1); /* compute current areas of all children */ vfloat4 sizeX = parent->upper_x-parent->lower_x; vfloat4 sizeY = parent->upper_y-parent->lower_y; vfloat4 sizeZ = parent->upper_z-parent->lower_z; vfloat4 childArea = sizeX*(sizeY + sizeZ) + sizeY*sizeZ; /*! get node bounds */ BBox<vfloat4> child1_0,child1_1,child1_2,child1_3; parent->bounds(child1_0,child1_1,child1_2,child1_3); /*! Find best rotation. We pick a first child (child1) and a sub-child (child2child) of a different second child (child2), and swap child1 and child2child. We perform the best such swap. */ float bestArea = 0; size_t bestChild1 = -1, bestChild2 = -1, bestChild2Child = -1; for (size_t c2=0; c2<4; c2++) { /*! ignore leaf nodes as we cannot descent into them */ if (parent->child(c2).isBarrier()) continue; if (parent->child(c2).isLeaf()) continue; Node* child2 = parent->child(c2).node(); /*! transpose child bounds */ BBox<vfloat4> child2c0,child2c1,child2c2,child2c3; child2->bounds(child2c0,child2c1,child2c2,child2c3); /*! put child1_0 at each child2 position */ float cost00 = halfArea3f(merge(child1_0,child2c1,child2c2,child2c3)); float cost01 = halfArea3f(merge(child2c0,child1_0,child2c2,child2c3)); float cost02 = halfArea3f(merge(child2c0,child2c1,child1_0,child2c3)); float cost03 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_0)); vfloat4 cost0 = vfloat4(cost00,cost01,cost02,cost03); vfloat4 min0 = vreduce_min(cost0); int pos0 = (int)__bsf(movemask(min0 == cost0)); /*! put child1_1 at each child2 position */ float cost10 = halfArea3f(merge(child1_1,child2c1,child2c2,child2c3)); float cost11 = halfArea3f(merge(child2c0,child1_1,child2c2,child2c3)); float cost12 = halfArea3f(merge(child2c0,child2c1,child1_1,child2c3)); float cost13 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_1)); vfloat4 cost1 = vfloat4(cost10,cost11,cost12,cost13); vfloat4 min1 = vreduce_min(cost1); int pos1 = (int)__bsf(movemask(min1 == cost1)); /*! put child1_2 at each child2 position */ float cost20 = halfArea3f(merge(child1_2,child2c1,child2c2,child2c3)); float cost21 = halfArea3f(merge(child2c0,child1_2,child2c2,child2c3)); float cost22 = halfArea3f(merge(child2c0,child2c1,child1_2,child2c3)); float cost23 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_2)); vfloat4 cost2 = vfloat4(cost20,cost21,cost22,cost23); vfloat4 min2 = vreduce_min(cost2); int pos2 = (int)__bsf(movemask(min2 == cost2)); /*! put child1_3 at each child2 position */ float cost30 = halfArea3f(merge(child1_3,child2c1,child2c2,child2c3)); float cost31 = halfArea3f(merge(child2c0,child1_3,child2c2,child2c3)); float cost32 = halfArea3f(merge(child2c0,child2c1,child1_3,child2c3)); float cost33 = halfArea3f(merge(child2c0,child2c1,child2c2,child1_3)); vfloat4 cost3 = vfloat4(cost30,cost31,cost32,cost33); vfloat4 min3 = vreduce_min(cost3); int pos3 = (int)__bsf(movemask(min3 == cost3)); /*! find best other child */ vfloat4 area0123 = vfloat4(extract<0>(min0),extract<0>(min1),extract<0>(min2),extract<0>(min3)) - vfloat4(childArea[c2]); int pos[4] = { pos0,pos1,pos2,pos3 }; const size_t mbd = BVH4::maxBuildDepth; vbool4 valid = vint4(int(depth+1))+cdepth <= vint4(mbd); // only select swaps that fulfill depth constraints valid &= vint4(c2) != vint4(step); if (none(valid)) continue; size_t c1 = select_min(valid,area0123); float area = area0123[c1]; if (c1 == c2) continue; // can happen if bounds are NANs /*! accept a swap when it reduces cost and is not swapping a node with itself */ if (area < bestArea) { bestArea = area; bestChild1 = c1; bestChild2 = c2; bestChild2Child = pos[c1]; } } /*! if we did not find a swap that improves the SAH then do nothing */ if (bestChild1 == size_t(-1)) return 1+reduce_max(cdepth); /*! perform the best found tree rotation */ Node* child2 = parent->child(bestChild2).node(); BVH4::swap(parent,bestChild1,child2,bestChild2Child); parent->set(bestChild2,child2->bounds()); BVH4::compact(parent); BVH4::compact(child2); /*! This returned depth is conservative as the child that was * pulled up in the tree could have been on the critical path. */ cdepth[bestChild1]++; // bestChild1 was pushed down one level return 1+reduce_max(cdepth); }
void BVH4Intersector1<PrimitiveIntersector>::intersect(const BVH4* bvh, Ray& ray) { /*! stack state */ StackItemInt32<NodeRef> stack[stackSize]; //!< stack of nodes StackItemInt32<NodeRef>* stackPtr = stack+1; //!< current stack pointer StackItemInt32<NodeRef>* stackEnd = stack+stackSize; stack[0].ptr = bvh->root; stack[0].dist = neg_inf; /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray.dir.x >= 0.0f ? 0*sizeof(ssef) : 1*sizeof(ssef); const size_t nearY = ray.dir.y >= 0.0f ? 2*sizeof(ssef) : 3*sizeof(ssef); const size_t nearZ = ray.dir.z >= 0.0f ? 4*sizeof(ssef) : 5*sizeof(ssef); #if 0 // FIXME: why is this slower /*! load the ray */ Vec3fa ray_org = ray.org; Vec3fa ray_dir = ray.dir; ssef ray_near = max(ray.tnear,FLT_MIN); // we do not support negative tnear values in this kernel due to integer optimizations ssef ray_far = ray.tfar; #if defined(__FIX_RAYS__) const float float_range = 0.1f*FLT_MAX; ray_org = clamp(ray_org,Vec3fa(-float_range),Vec3fa(+float_range)); ray_dir = clamp(ray_dir,Vec3fa(-float_range),Vec3fa(+float_range)); ray_far = min(ray_far,float(inf)); #endif const Vec3fa ray_rdir = rcp_safe(ray_dir); const sse3f org(ray_org), dir(ray_dir); const sse3f norg(-ray_org), rdir(ray_rdir), org_rdir(ray_org*ray_rdir); #else /*! load the ray into SIMD registers */ const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const Vec3fa ray_rdir = rcp_safe(ray.dir); const sse3f rdir(ray_rdir.x,ray_rdir.y,ray_rdir.z); const Vec3fa ray_org_rdir = ray.org*ray_rdir; const sse3f org_rdir(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z); const ssef ray_near(ray.tnear); ssef ray_far(ray.tfar); #endif /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = NodeRef(stackPtr->ptr); /*! if popped node is too far, pop next one */ if (unlikely(*(float*)&stackPtr->dist > ray.tfar)) continue; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(normal.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(); const size_t farX = nearX ^ 16, farY = nearY ^ 16, farZ = nearZ ^ 16; #if defined (__AVX2__) const ssef tNearX = msub(load4f((const char*)node+nearX), rdir.x, org_rdir.x); const ssef tNearY = msub(load4f((const char*)node+nearY), rdir.y, org_rdir.y); const ssef tNearZ = msub(load4f((const char*)node+nearZ), rdir.z, org_rdir.z); const ssef tFarX = msub(load4f((const char*)node+farX ), rdir.x, org_rdir.x); const ssef tFarY = msub(load4f((const char*)node+farY ), rdir.y, org_rdir.y); const ssef tFarZ = msub(load4f((const char*)node+farZ ), rdir.z, org_rdir.z); #else const ssef tNearX = (norg.x + load4f((const char*)node+nearX)) * rdir.x; const ssef tNearY = (norg.y + load4f((const char*)node+nearY)) * rdir.y; const ssef tNearZ = (norg.z + load4f((const char*)node+nearZ)) * rdir.z; const ssef tFarX = (norg.x + load4f((const char*)node+farX )) * rdir.x; const ssef tFarY = (norg.y + load4f((const char*)node+farY )) * rdir.y; const ssef tFarZ = (norg.z + load4f((const char*)node+farZ )) * rdir.z; #endif #if defined(__SSE4_1__) const ssef tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,ray_near)); const ssef tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,ray_far )); const sseb vmask = cast(tNear) > cast(tFar); size_t mask = movemask(vmask)^0xf; #else const ssef tNear = max(tNearX,tNearY,tNearZ,ray_near); const ssef tFar = min(tFarX ,tFarY ,tFarZ ,ray_far); const sseb vmask = tNear <= tFar; size_t mask = movemask(vmask); #endif /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); assert(cur != BVH4::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const unsigned int d0 = ((unsigned int*)&tNear)[r]; r = __bscf(mask); NodeRef c1 = node->child(r); const unsigned int d1 = ((unsigned int*)&tNear)[r]; assert(c0 != BVH4::emptyNode); assert(c1 != BVH4::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; continue; } else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; continue; } } /*! Here starts the slow path for 3 or 4 hit children. We push * all nodes onto the stack to sort them there. */ assert(stackPtr < stackEnd); stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; assert(stackPtr < stackEnd); stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); NodeRef c = node->child(r); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; assert(c != BVH4::emptyNode); if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); c = node->child(r); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; assert(c != BVH4::emptyNode); sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; } /*! this is a leaf node */ STAT3(normal.trav_leaves,1,1,1); size_t num; Primitive* prim = (Primitive*) cur.leaf(num); PrimitiveIntersector::intersect(ray,prim,num,bvh->geometry); ray_far = ray.tfar; } }
void BVH8Intersector8Hybrid<PrimitiveIntersector8>::occluded(bool8* valid_i, BVH8* bvh, Ray8& ray) { /* load ray */ const bool8 valid = *valid_i; bool8 terminated = !valid; Vec3f8 ray_org = ray.org, ray_dir = ray.dir; float8 ray_tnear = ray.tnear, ray_tfar = ray.tfar; const Vec3f8 rdir = rcp_safe(ray_dir); const Vec3f8 org(ray_org), org_rdir = org * rdir; ray_tnear = select(valid,ray_tnear,float8(pos_inf)); ray_tfar = select(valid,ray_tfar ,float8(neg_inf)); const float8 inf = float8(pos_inf); Precalculations pre(valid,ray); /* compute near/far per ray */ Vec3i8 nearXYZ; nearXYZ.x = select(rdir.x >= 0.0f,int8(0*(int)sizeof(float8)),int8(1*(int)sizeof(float8))); nearXYZ.y = select(rdir.y >= 0.0f,int8(2*(int)sizeof(float8)),int8(3*(int)sizeof(float8))); nearXYZ.z = select(rdir.z >= 0.0f,int8(4*(int)sizeof(float8)),int8(5*(int)sizeof(float8))); /* allocate stack and push root node */ float8 stack_near[stackSizeChunk]; NodeRef stack_node[stackSizeChunk]; stack_node[0] = BVH8::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* stackEnd = stack_node+stackSizeChunk; NodeRef* __restrict__ sptr_node = stack_node + 2; float8* __restrict__ sptr_near = stack_near + 2; while (1) { /* pop next node from stack */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; NodeRef cur = *sptr_node; if (unlikely(cur == BVH8::invalidNode)) { assert(sptr_node == stack_node); break; } /* cull node if behind closest hit point */ float8 curDist = *sptr_near; const bool8 active = curDist < ray_tfar; if (unlikely(none(active))) continue; /* switch to single ray traversal */ #if !defined(__WIN32__) || defined(__X86_64__) size_t bits = movemask(active); if (unlikely(__popcnt(bits) <= SWITCH_THRESHOLD)) { for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) { if (occluded1(bvh,cur,i,pre,ray,ray_org,ray_dir,rdir,ray_tnear,ray_tfar,nearXYZ)) terminated[i] = -1; } if (all(terminated)) break; ray_tfar = select(terminated,float8(neg_inf),ray_tfar); continue; } #endif while (1) { /* test if this is a leaf node */ if (unlikely(cur.isLeaf())) break; const bool8 valid_node = ray_tfar > curDist; STAT3(shadow.trav_nodes,1,popcnt(valid_node),8); const Node* __restrict__ const node = (Node*)cur.node(); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; cur = *sptr_node; curDist = *sptr_near; for (unsigned i=0; i<BVH8::N; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH8::emptyNode)) break; #if defined(__AVX2__) const float8 lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x); const float8 lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y); const float8 lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z); const float8 lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x); const float8 lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y); const float8 lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z); const float8 lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); const float8 lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); const bool8 lhit = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar); #else const float8 lclipMinX = (node->lower_x[i] - org.x) * rdir.x; const float8 lclipMinY = (node->lower_y[i] - org.y) * rdir.y; const float8 lclipMinZ = (node->lower_z[i] - org.z) * rdir.z; const float8 lclipMaxX = (node->upper_x[i] - org.x) * rdir.x; const float8 lclipMaxY = (node->upper_y[i] - org.y) * rdir.y; const float8 lclipMaxZ = (node->upper_z[i] - org.z) * rdir.z; const float8 lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const float8 lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const bool8 lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); #endif /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); assert(child != BVH8::emptyNode); const float8 childDist = select(lhit,lnearP,inf); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = cur; *(sptr_near-1) = curDist; curDist = childDist; cur = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } } /* return if stack is empty */ if (unlikely(cur == BVH8::invalidNode)) { assert(sptr_node == stack_node); break; } /* intersect leaf */ assert(cur != BVH8::emptyNode); const bool8 valid_leaf = ray_tfar > curDist; STAT3(shadow.trav_leaves,1,popcnt(valid_leaf),8); size_t items; const Triangle* prim = (Triangle*) cur.leaf(items); terminated |= PrimitiveIntersector8::occluded(!terminated,pre,ray,prim,items,bvh->scene); if (all(terminated)) break; ray_tfar = select(terminated,float8(neg_inf),ray_tfar); } store8i(valid & terminated,&ray.geomID,0); AVX_ZERO_UPPER(); }
void BVH4iIntersector1<TriangleIntersector>::intersect(const BVH4iIntersector1* This, Ray& ray) { AVX_ZERO_UPPER(); STAT3(normal.travs,1,1,1); /*! stack state */ const BVH4i* bvh = This->bvh; StackItem stack[1+3*BVH4i::maxDepth]; //!< stack of nodes StackItem* stackPtr = stack+1; //!< current stack pointer stack[0].ptr = bvh->root; stack[0].dist = neg_inf; /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray.dir.x >= 0.0f ? 0*sizeof(ssef_m) : 1*sizeof(ssef_m); const size_t nearY = ray.dir.y >= 0.0f ? 2*sizeof(ssef_m) : 3*sizeof(ssef_m); const size_t nearZ = ray.dir.z >= 0.0f ? 4*sizeof(ssef_m) : 5*sizeof(ssef_m); /*! load the ray into SIMD registers */ const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const Vector3f ray_rdir = rcp_safe(ray.dir); const sse3f rdir(ray_rdir.x,ray_rdir.y,ray_rdir.z); const Vector3f ray_org_rdir = ray.org*ray_rdir; const sse3f org_rdir(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z); const ssef rayNear(ray.tnear); ssef rayFar(ray.tfar); const void* nodePtr = bvh->nodePtr(); const void* triPtr = bvh->triPtr(); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = NodeRef(stackPtr->ptr); /*! if popped node is too far, pop next one */ if (unlikely(stackPtr->dist > ray.tfar)) continue; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(normal.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(nodePtr); const size_t farX = nearX ^ 16, farY = nearY ^ 16, farZ = nearZ ^ 16; #if defined (__AVX2__) const ssef tNearX = msub(ssef((const char*)nodePtr+(size_t)cur+nearX), rdir.x, org_rdir.x); const ssef tNearY = msub(ssef((const char*)nodePtr+(size_t)cur+nearY), rdir.y, org_rdir.y); const ssef tNearZ = msub(ssef((const char*)nodePtr+(size_t)cur+nearZ), rdir.z, org_rdir.z); const ssef tFarX = msub(ssef((const char*)nodePtr+(size_t)cur+farX ), rdir.x, org_rdir.x); const ssef tFarY = msub(ssef((const char*)nodePtr+(size_t)cur+farY ), rdir.y, org_rdir.y); const ssef tFarZ = msub(ssef((const char*)nodePtr+(size_t)cur+farZ ), rdir.z, org_rdir.z); #else const ssef tNearX = (norg.x + ssef((const char*)nodePtr+(size_t)cur+nearX)) * rdir.x; const ssef tNearY = (norg.y + ssef((const char*)nodePtr+(size_t)cur+nearY)) * rdir.y; const ssef tNearZ = (norg.z + ssef((const char*)nodePtr+(size_t)cur+nearZ)) * rdir.z; const ssef tFarX = (norg.x + ssef((const char*)nodePtr+(size_t)cur+farX )) * rdir.x; const ssef tFarY = (norg.y + ssef((const char*)nodePtr+(size_t)cur+farY )) * rdir.y; const ssef tFarZ = (norg.z + ssef((const char*)nodePtr+(size_t)cur+farZ )) * rdir.z; #endif const ssef tNear = max(tNearX,tNearY,tNearZ,rayNear); const ssef tFar = min(tFarX ,tFarY ,tFarZ ,rayFar); size_t mask = movemask(tNear <= tFar); /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bsf(mask); mask = __btc(mask,r); if (likely(mask == 0)) { cur = node->child(r); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const float d0 = tNear[r]; r = __bsf(mask); mask = __btc(mask,r); NodeRef c1 = node->child(r); const float d1 = tNear[r]; if (likely(mask == 0)) { if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; continue; } else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; continue; } } /*! Here starts the slow path for 3 or 4 hit children. We push * all nodes onto the stack to sort them there. */ stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ r = __bsf(mask); mask = __btc(mask,r); NodeRef c = node->child(r); float d = tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ r = __bsf(mask); mask = __btc(mask,r); c = node->child(r); d = tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; } /*! this is a leaf node */ STAT3(normal.trav_leaves,1,1,1); size_t num; Triangle* tri = (Triangle*) cur.leaf(triPtr,num); for (size_t i=0; i<num; i++) TriangleIntersector::intersect(ray,tri[i],bvh->vertices); rayFar = ray.tfar; } AVX_ZERO_UPPER(); }
__forceinline void BVH8iIntersector8Hybrid<TriangleIntersector8>::intersect1(const BVH8i* bvh, NodeRef root, const size_t k, Ray8& ray,const avx3f &ray_org, const avx3f &ray_dir, const avx3f &ray_rdir, const avxf &ray_tnear, const avxf &ray_tfar, const avx3i& nearXYZ) { /*! stack state */ StackItemInt64 stack[stackSizeSingle]; //!< stack of nodes StackItemInt64* stackPtr = stack+1; //!< current stack pointer StackItemInt64* stackEnd = stack+stackSizeSingle; stack[0].ptr = root; stack[0].dist = neg_inf; /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = nearXYZ.x[k]; const size_t nearY = nearXYZ.y[k]; const size_t nearZ = nearXYZ.z[k]; /*! load the ray into SIMD registers */ const avx3f org (ray_org .x[k],ray_org .y[k],ray_org .z[k]); const avx3f rdir(ray_rdir.x[k],ray_rdir.y[k],ray_rdir.z[k]); const avx3f org_rdir(org*rdir); avxf rayNear(ray_tnear[k]), rayFar(ray_tfar[k]); const Node * __restrict__ nodes = (Node *)bvh->nodePtr(); const Triangle * __restrict__ accel = (Triangle*)bvh->triPtr(); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = NodeRef(stackPtr->ptr); /*! if popped node is too far, pop next one */ if (unlikely(*(float*)&stackPtr->dist > ray.tfar[k])) continue; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(normal.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = (Node*)cur.node(nodes); const size_t farX = nearX ^ sizeof(avxf), farY = nearY ^ sizeof(avxf), farZ = nearZ ^ sizeof(avxf); #if defined (__AVX2__) const avxf tNearX = msub(load8f((const char*)node+nearX), rdir.x, org_rdir.x); const avxf tNearY = msub(load8f((const char*)node+nearY), rdir.y, org_rdir.y); const avxf tNearZ = msub(load8f((const char*)node+nearZ), rdir.z, org_rdir.z); const avxf tFarX = msub(load8f((const char*)node+farX ), rdir.x, org_rdir.x); const avxf tFarY = msub(load8f((const char*)node+farY ), rdir.y, org_rdir.y); const avxf tFarZ = msub(load8f((const char*)node+farZ ), rdir.z, org_rdir.z); #else const avxf tNearX = (load8f((const char*)node+nearX) - org.x) * rdir.x; const avxf tNearY = (load8f((const char*)node+nearY) - org.y) * rdir.y; const avxf tNearZ = (load8f((const char*)node+nearZ) - org.z) * rdir.z; const avxf tFarX = (load8f((const char*)node+farX ) - org.x) * rdir.x; const avxf tFarY = (load8f((const char*)node+farY ) - org.y) * rdir.y; const avxf tFarZ = (load8f((const char*)node+farZ ) - org.z) * rdir.z; #endif #if defined(__AVX2__) const avxf tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,rayNear)); const avxf tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,rayFar )); const avxb vmask = cast(tNear) > cast(tFar); unsigned int mask = movemask(vmask)^0xff; #else const avxf tNear = max(tNearX,tNearY,tNearZ,rayNear); const avxf tFar = min(tFarX ,tFarY ,tFarZ ,rayFar); const avxb vmask = tNear <= tFar; unsigned int mask = movemask(vmask); #endif /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); assert(cur != BVH4i::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const unsigned int d0 = ((unsigned int*)&tNear)[r]; r = __bscf(mask); NodeRef c1 = node->child(r); const unsigned int d1 = ((unsigned int*)&tNear)[r]; assert(c0 != BVH4i::emptyNode); assert(c1 != BVH4i::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; continue; } else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; continue; } } /*! Here starts the slow path for 3 or 4 hit children. We push * all nodes onto the stack to sort them there. */ assert(stackPtr < stackEnd); stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; assert(stackPtr < stackEnd); stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); NodeRef c = node->child(r); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; assert(c0 != BVH4i::emptyNode); if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); c = node->child(r); d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; assert(c != BVH4i::emptyNode); if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } while(1) { r = __bscf(mask); c = node->child(r); d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; if (unlikely(mask == 0)) break; } cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; } /*! this is a leaf node */ STAT3(normal.trav_leaves,1,1,1); size_t num; Triangle* prim = (Triangle*) cur.leaf(accel,num); TriangleIntersector8::intersect(ray,k,prim,num,bvh->geometry); rayFar = ray.tfar[k]; } }
__forceinline bool occludedT(const BVH4* bvh, Ray& ray) { typedef typename TriangleIntersector::Triangle Triangle; typedef StackItemT<size_t> StackItem; typedef typename BVH4::NodeRef NodeRef; typedef typename BVH4::Node Node; /*! stack state */ NodeRef stack[1+3*BVH4::maxDepth]; //!< stack of nodes that still need to get traversed NodeRef* stackPtr = stack+1; //!< current stack pointer stack[0] = bvh->root; /*! load the ray into SIMD registers */ const avxf pos_neg = avxf(ssef(+0.0f),ssef(-0.0f)); const avxf neg_pos = avxf(ssef(-0.0f),ssef(+0.0f)); const avxf flipSignX = swapX ? neg_pos : pos_neg; const avxf flipSignY = swapY ? neg_pos : pos_neg; const avxf flipSignZ = swapZ ? neg_pos : pos_neg; const avx3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const Vector3f ray_rdir = rcp_safe(ray.dir); const avx3f rdir(ray_rdir.x^flipSignX,ray_rdir.y^flipSignY,ray_rdir.z^flipSignZ); const avx3f org_rdir(avx3f(ray.org.x,ray.org.y,ray.org.z)*rdir); const avxf rayNearFar(ssef(ray.tnear),-ssef(ray.tfar)); const void* nodePtr = bvh->nodePtr(); const void* triPtr = bvh->triPtr(); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = (NodeRef) *stackPtr; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(shadow.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(nodePtr); #if defined (__AVX2__) || defined(__MIC__) const avxf tLowerUpperX = msub(avxf::load(&node->lower_x), rdir.x, org_rdir.x); const avxf tLowerUpperY = msub(avxf::load(&node->lower_y), rdir.y, org_rdir.y); const avxf tLowerUpperZ = msub(avxf::load(&node->lower_z), rdir.z, org_rdir.z); #else const avxf tLowerUpperX = (norg.x + avxf::load(&node->lower_x)) * rdir.x; const avxf tLowerUpperY = (norg.y + avxf::load(&node->lower_y)) * rdir.y; const avxf tLowerUpperZ = (norg.z + avxf::load(&node->lower_z)) * rdir.z; #endif const avxf tNearFarX = swapX ? shuffle<1,0>(tLowerUpperX) : tLowerUpperX; const avxf tNearFarY = swapY ? shuffle<1,0>(tLowerUpperY) : tLowerUpperY; const avxf tNearFarZ = swapZ ? shuffle<1,0>(tLowerUpperZ) : tLowerUpperZ; const avxf tNearFar = max(tNearFarX,tNearFarY,tNearFarZ,rayNearFar); const ssef tNear = extract<0>(tNearFar); const ssef tFar = extract<1>(tNearFar); size_t mask = movemask(-tNear >= tFar); /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bsf(mask); mask = __btc(mask,r); if (likely(mask == 0)) { cur = node->child(r); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const float d0 = tNear[r]; r = __bsf(mask); mask = __btc(mask,r); NodeRef c1 = node->child(r); const float d1 = tNear[r]; if (likely(mask == 0)) { if (d0 < d1) { *stackPtr = c1; stackPtr++; cur = c0; continue; } else { *stackPtr = c0; stackPtr++; cur = c1; continue; } } *stackPtr = c0; stackPtr++; *stackPtr = c1; stackPtr++; /*! three children are hit */ r = __bsf(mask); mask = __btc(mask,r); cur = node->child(r); *stackPtr = cur; stackPtr++; if (likely(mask == 0)) { stackPtr--; continue; } /*! four children are hit */ cur = node->child(3); } /*! this is a leaf node */ STAT3(shadow.trav_leaves,1,1,1); size_t num; Triangle* tri = (Triangle*) cur.leaf(triPtr,num); for (size_t i=0; i<num; i++) { if (TriangleIntersector::occluded(ray,tri[i],bvh->vertices)) { AVX_ZERO_UPPER(); return true; } } } AVX_ZERO_UPPER(); return false; }
void BVH8iIntersector8Hybrid<TriangleIntersector8>::occluded(avxb* valid_i, BVH8i* bvh, Ray8& ray) { /* load ray */ const avxb valid = *valid_i; avxb terminated = !valid; avx3f ray_org = ray.org, ray_dir = ray.dir; avxf ray_tnear = ray.tnear, ray_tfar = ray.tfar; #if defined(__FIX_RAYS__) const avxf float_range = 0.1f*FLT_MAX; ray_org = clamp(ray_org,avx3f(-float_range),avx3f(+float_range)); ray_dir = clamp(ray_dir,avx3f(-float_range),avx3f(+float_range)); ray_tnear = max(ray_tnear,FLT_MIN); ray_tfar = min(ray_tfar,float(inf)); #endif const avx3f rdir = rcp_safe(ray_dir); const avx3f org(ray_org), org_rdir = org * rdir; ray_tnear = select(valid,ray_tnear,avxf(pos_inf)); ray_tfar = select(valid,ray_tfar ,avxf(neg_inf)); const avxf inf = avxf(pos_inf); /* compute near/far per ray */ avx3i nearXYZ; nearXYZ.x = select(rdir.x >= 0.0f,avxi(0*(int)sizeof(avxf)),avxi(1*(int)sizeof(avxf))); nearXYZ.y = select(rdir.y >= 0.0f,avxi(2*(int)sizeof(avxf)),avxi(3*(int)sizeof(avxf))); nearXYZ.z = select(rdir.z >= 0.0f,avxi(4*(int)sizeof(avxf)),avxi(5*(int)sizeof(avxf))); /* allocate stack and push root node */ avxf stack_near[stackSizeChunk]; NodeRef stack_node[stackSizeChunk]; stack_node[0] = BVH4i::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* stackEnd = stack_node+stackSizeChunk; NodeRef* __restrict__ sptr_node = stack_node + 2; avxf* __restrict__ sptr_near = stack_near + 2; const Node * __restrict__ nodes = (Node *)bvh->nodePtr(); const Triangle * __restrict__ accel = (Triangle*)bvh->triPtr(); while (1) { /* pop next node from stack */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; NodeRef curNode = *sptr_node; if (unlikely(curNode == BVH4i::invalidNode)) { assert(sptr_node == stack_node); break; } /* cull node if behind closest hit point */ avxf curDist = *sptr_near; const avxb active = curDist < ray_tfar; if (unlikely(none(active))) continue; /* switch to single ray traversal */ #if !defined(__WIN32__) || defined(__X86_64__) size_t bits = movemask(active); if (unlikely(__popcnt(bits) <= SWITCH_THRESHOLD)) { for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) { if (occluded1(bvh,curNode,i,ray,ray_org,ray_dir,rdir,ray_tnear,ray_tfar,nearXYZ)) terminated[i] = -1; } if (all(terminated)) break; ray_tfar = select(terminated,avxf(neg_inf),ray_tfar); continue; } #endif while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf())) break; const avxb valid_node = ray_tfar > curDist; STAT3(shadow.trav_nodes,1,popcnt(valid_node),8); const Node* __restrict__ const node = (Node*)curNode.node(nodes); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; curNode = *sptr_node; curDist = *sptr_near; for (unsigned i=0; i<8; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH4i::emptyNode)) break; #if defined(__AVX2__) const avxf lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x); const avxf lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y); const avxf lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z); const avxf lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x); const avxf lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y); const avxf lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z); const avxf lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); const avxf lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); const avxb lhit = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar); #else const avxf lclipMinX = (node->lower_x[i] - org.x) * rdir.x; const avxf lclipMinY = (node->lower_y[i] - org.y) * rdir.y; const avxf lclipMinZ = (node->lower_z[i] - org.z) * rdir.z; const avxf lclipMaxX = (node->upper_x[i] - org.x) * rdir.x; const avxf lclipMaxY = (node->upper_y[i] - org.y) * rdir.y; const avxf lclipMaxZ = (node->upper_z[i] - org.z) * rdir.z; const avxf lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const avxf lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const avxb lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); #endif /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); assert(child != BVH4i::emptyNode); const avxf childDist = select(lhit,lnearP,inf); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = curNode; *(sptr_near-1) = curDist; curDist = childDist; curNode = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } } /* return if stack is empty */ if (unlikely(curNode == BVH4i::invalidNode)) { assert(sptr_node == stack_node); break; } /* intersect leaf */ const avxb valid_leaf = ray_tfar > curDist; STAT3(shadow.trav_leaves,1,popcnt(valid_leaf),8); size_t items; const Triangle* prim = (Triangle*) curNode.leaf(accel,items); terminated |= TriangleIntersector8::occluded(!terminated,ray,prim,items,bvh->geometry); if (all(terminated)) break; ray_tfar = select(terminated,avxf(neg_inf),ray_tfar); } store8i(valid & terminated,&ray.geomID,0); AVX_ZERO_UPPER(); }
__forceinline void intersectT(const BVH4* bvh, Ray& ray) { typedef typename TriangleIntersector::Triangle Triangle; typedef StackItemT<size_t> StackItem; typedef typename BVH4::NodeRef NodeRef; typedef typename BVH4::Node Node; /*! stack state */ StackItem stack[1+3*BVH4::maxDepth]; //!< stack of nodes StackItem* stackPtr = stack+1; //!< current stack pointer stack[0].ptr = bvh->root; stack[0].dist = neg_inf; /*! load the ray into SIMD registers */ const avxf pos_neg = avxf(ssef(+0.0f),ssef(-0.0f)); const avxf neg_pos = avxf(ssef(-0.0f),ssef(+0.0f)); const avxf flipSignX = swapX ? neg_pos : pos_neg; const avxf flipSignY = swapY ? neg_pos : pos_neg; const avxf flipSignZ = swapZ ? neg_pos : pos_neg; const Vector3f ray_rdir = rcp_safe(ray.dir); const avx3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const avx3f rdir(ray_rdir.x^flipSignX,ray_rdir.y^flipSignY,ray_rdir.z^flipSignZ); const avx3f org_rdir(avx3f(ray.org.x,ray.org.y,ray.org.z)*rdir); avxf rayNearFar(ssef(ray.tnear),-ssef(ray.tfar)); const void* nodePtr = bvh->nodePtr(); const void* triPtr = bvh->triPtr(); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = NodeRef(stackPtr->ptr); /*! if popped node is too far, pop next one */ if (unlikely(stackPtr->dist > ray.tfar)) continue; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(normal.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(nodePtr); #if defined (__AVX2__) || defined(__MIC__) const avxf tLowerUpperX = msub(avxf::load(&node->lower_x), rdir.x, org_rdir.x); const avxf tLowerUpperY = msub(avxf::load(&node->lower_y), rdir.y, org_rdir.y); const avxf tLowerUpperZ = msub(avxf::load(&node->lower_z), rdir.z, org_rdir.z); #else const avxf tLowerUpperX = (norg.x + avxf::load(&node->lower_x)) * rdir.x; const avxf tLowerUpperY = (norg.y + avxf::load(&node->lower_y)) * rdir.y; const avxf tLowerUpperZ = (norg.z + avxf::load(&node->lower_z)) * rdir.z; #endif const avxf tNearFarX = swapX ? shuffle<1,0>(tLowerUpperX) : tLowerUpperX; const avxf tNearFarY = swapY ? shuffle<1,0>(tLowerUpperY) : tLowerUpperY; const avxf tNearFarZ = swapZ ? shuffle<1,0>(tLowerUpperZ) : tLowerUpperZ; const avxf tNearFar = max(tNearFarX,tNearFarY,tNearFarZ,rayNearFar); const ssef tNear = extract<0>(tNearFar); const ssef tFar = extract<1>(tNearFar); size_t mask = movemask(-tNear >= tFar); /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bsf(mask); mask = __btc(mask,r); if (likely(mask == 0)) { cur = node->child(r); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const float d0 = tNear[r]; r = __bsf(mask); mask = __btc(mask,r); NodeRef c1 = node->child(r); const float d1 = tNear[r]; if (likely(mask == 0)) { if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; continue; } else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; continue; } } /*! Here starts the slow path for 3 or 4 hit children. We push * all nodes onto the stack to sort them there. */ stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ r = __bsf(mask); mask = __btc(mask,r); NodeRef c = node->child(r); float d = tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ r = __bsf(mask); mask = __btc(mask,r); c = node->child(r); d = tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; } /*! this is a leaf node */ STAT3(normal.trav_leaves,1,1,1); size_t num; Triangle* tri = (Triangle*) cur.leaf(triPtr,num); for (size_t i=0; i<num; i++) TriangleIntersector::intersect(ray,tri[i],bvh->vertices); rayNearFar = insert<1>(rayNearFar,-ssef(ray.tfar)); } }
void BVH4Intersector4Hybrid<PrimitiveIntersector4>::intersect(sseb* valid_i, BVH4* bvh, Ray4& ray) { /* load ray */ const sseb valid0 = *valid_i; sse3f ray_org = ray.org, ray_dir = ray.dir; ssef ray_tnear = ray.tnear, ray_tfar = ray.tfar; #if defined(__FIX_RAYS__) const ssef float_range = 0.1f*FLT_MAX; ray_org = clamp(ray_org,sse3f(-float_range),sse3f(+float_range)); ray_dir = clamp(ray_dir,sse3f(-float_range),sse3f(+float_range)); ray_tnear = max(ray_tnear,FLT_MIN); ray_tfar = min(ray_tfar,float(inf)); #endif const sse3f rdir = rcp_safe(ray_dir); const sse3f org(ray_org), org_rdir = org * rdir; ray_tnear = select(valid0,ray_tnear,ssef(pos_inf)); ray_tfar = select(valid0,ray_tfar ,ssef(neg_inf)); const ssef inf = ssef(pos_inf); /* allocate stack and push root node */ ssef stack_near[stackSizeChunk]; NodeRef stack_node[stackSizeChunk]; stack_node[0] = BVH4::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* stackEnd = stack_node+stackSizeChunk; NodeRef* __restrict__ sptr_node = stack_node + 2; ssef* __restrict__ sptr_near = stack_near + 2; while (1) { /* pop next node from stack */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; NodeRef curNode = *sptr_node; if (unlikely(curNode == BVH4::invalidNode)) { assert(sptr_node == stack_node); break; } /* cull node if behind closest hit point */ ssef curDist = *sptr_near; const sseb active = curDist < ray_tfar; if (unlikely(none(active))) continue; /* switch to single ray traversal */ #if !defined(__WIN32__) || defined(__X86_64__) size_t bits = movemask(active); if (unlikely(__popcnt(bits) <= SWITCH_THRESHOLD)) { for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) { intersect1(bvh,curNode,i,ray,ray_org,ray_dir,rdir,ray_tnear,ray_tfar); } ray_tfar = ray.tfar; continue; } #endif while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf())) break; const sseb valid_node = ray_tfar > curDist; STAT3(normal.trav_nodes,1,popcnt(valid_node),4); const Node* __restrict__ const node = curNode.node(); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; curNode = *sptr_node; curDist = *sptr_near; #pragma unroll(4) for (unsigned i=0; i<4; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH4::emptyNode)) break; #if defined(__AVX2__) const ssef lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x); const ssef lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y); const ssef lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z); const ssef lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x); const ssef lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y); const ssef lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z); #else const ssef lclipMinX = (node->lower_x[i] - org.x) * rdir.x; const ssef lclipMinY = (node->lower_y[i] - org.y) * rdir.y; const ssef lclipMinZ = (node->lower_z[i] - org.z) * rdir.z; const ssef lclipMaxX = (node->upper_x[i] - org.x) * rdir.x; const ssef lclipMaxY = (node->upper_y[i] - org.y) * rdir.y; const ssef lclipMaxZ = (node->upper_z[i] - org.z) * rdir.z; #endif #if defined(__SSE4_1__) const ssef lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); const ssef lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); const sseb lhit = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar); #else const ssef lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const ssef lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const sseb lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); #endif /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); const ssef childDist = select(lhit,lnearP,inf); const NodeRef child = node->children[i]; assert(child != BVH4::emptyNode); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = curNode; *(sptr_near-1) = curDist; curDist = childDist; curNode = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } } /* return if stack is empty */ if (unlikely(curNode == BVH4::invalidNode)) { assert(sptr_node == stack_node); break; } /* intersect leaf */ const sseb valid_leaf = ray_tfar > curDist; STAT3(normal.trav_leaves,1,popcnt(valid_leaf),4); size_t items; const Primitive* prim = (Primitive*) curNode.leaf(items); PrimitiveIntersector4::intersect(valid_leaf,ray,prim,items,bvh->geometry); ray_tfar = select(valid_leaf,ray.tfar,ray_tfar); } AVX_ZERO_UPPER(); }
__forceinline void BVH4Intersector4Hybrid<PrimitiveIntersector4>::intersect1(const BVH4* bvh, NodeRef root, size_t k, Ray4& ray, const sse3f& ray_org, const sse3f& ray_dir, const sse3f& ray_rdir, const ssef& ray_tnear, const ssef& ray_tfar) { /*! stack state */ StackItem stack[stackSizeSingle]; //!< stack of nodes StackItem* stackPtr = stack+1; //!< current stack pointer StackItem* stackEnd = stack+stackSizeSingle; stack[0].ptr = root; stack[0].dist = neg_inf; /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray_dir.x[k] >= 0.0f ? 0*sizeof(ssef) : 1*sizeof(ssef); const size_t nearY = ray_dir.y[k] >= 0.0f ? 2*sizeof(ssef) : 3*sizeof(ssef); const size_t nearZ = ray_dir.z[k] >= 0.0f ? 4*sizeof(ssef) : 5*sizeof(ssef); /*! load the ray into SIMD registers */ const sse3f org (ray_org .x[k],ray_org .y[k],ray_org .z[k]); const sse3f rdir(ray_rdir.x[k],ray_rdir.y[k],ray_rdir.z[k]); const sse3f norg = -org, org_rdir(org*rdir); ssef rayNear(ray_tnear[k]), rayFar(ray_tfar[k]); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = NodeRef(stackPtr->ptr); /*! if popped node is too far, pop next one */ if (unlikely(stackPtr->dist > ray.tfar[k])) continue; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(normal.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(); const size_t farX = nearX ^ 16, farY = nearY ^ 16, farZ = nearZ ^ 16; #if defined (__AVX2__) const ssef tNearX = msub(load4f((const char*)node+nearX), rdir.x, org_rdir.x); const ssef tNearY = msub(load4f((const char*)node+nearY), rdir.y, org_rdir.y); const ssef tNearZ = msub(load4f((const char*)node+nearZ), rdir.z, org_rdir.z); const ssef tFarX = msub(load4f((const char*)node+farX ), rdir.x, org_rdir.x); const ssef tFarY = msub(load4f((const char*)node+farY ), rdir.y, org_rdir.y); const ssef tFarZ = msub(load4f((const char*)node+farZ ), rdir.z, org_rdir.z); #else const ssef tNearX = (norg.x + load4f((const char*)node+nearX)) * rdir.x; const ssef tNearY = (norg.y + load4f((const char*)node+nearY)) * rdir.y; const ssef tNearZ = (norg.z + load4f((const char*)node+nearZ)) * rdir.z; const ssef tFarX = (norg.x + load4f((const char*)node+farX )) * rdir.x; const ssef tFarY = (norg.y + load4f((const char*)node+farY )) * rdir.y; const ssef tFarZ = (norg.z + load4f((const char*)node+farZ )) * rdir.z; #endif #if defined(__SSE4_1__) const ssef tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,rayNear)); const ssef tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,rayFar )); const sseb vmask = cast(tNear) > cast(tFar); size_t mask = movemask(vmask)^0xf; #else const ssef tNear = max(tNearX,tNearY,tNearZ,rayNear); const ssef tFar = min(tFarX ,tFarY ,tFarZ ,rayFar); const sseb vmask = tNear <= tFar; size_t mask = movemask(vmask); #endif /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); assert(cur != BVH4::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const float d0 = tNear[r]; r = __bscf(mask); NodeRef c1 = node->child(r); const float d1 = tNear[r]; assert(c0 != BVH4::emptyNode); assert(c1 != BVH4::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; continue; } else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; continue; } } /*! Here starts the slow path for 3 or 4 hit children. We push * all nodes onto the stack to sort them there. */ assert(stackPtr < stackEnd); stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; assert(stackPtr < stackEnd); stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); NodeRef c = node->child(r); float d = tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; assert(c != BVH4::emptyNode); if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); c = node->child(r); d = tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; assert(c != BVH4::emptyNode); sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; } /*! this is a leaf node */ STAT3(normal.trav_leaves,1,1,1); size_t num; Primitive* prim = (Primitive*) cur.leaf(num); PrimitiveIntersector4::intersect(ray,k,prim,num,bvh->geometry); rayFar = ray.tfar[k]; } }
void BVH4MBIntersector1<TriangleIntersector>::occluded(const BVH4MB* bvh, Ray& ray) { AVX_ZERO_UPPER(); STAT3(shadow.travs,1,1,1); /*! stack state */ Base* stack[1+3*BVH4MB::maxDepth]; //!< stack of nodes that still need to get traversed Base** stackPtr = stack+1; //!< current stack pointer stack[0] = bvh->root; //!< push first node onto stack /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = (ray.dir.x >= 0) ? 0*2*sizeof(ssef) : 1*2*sizeof(ssef); const size_t nearY = (ray.dir.y >= 0) ? 2*2*sizeof(ssef) : 3*2*sizeof(ssef); const size_t nearZ = (ray.dir.z >= 0) ? 4*2*sizeof(ssef) : 5*2*sizeof(ssef); const size_t farX = nearX ^ 32; const size_t farY = nearY ^ 32; const size_t farZ = nearZ ^ 32; /*! load the ray into SIMD registers */ const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const Vec3fa ray_rdir = rcp_safe(ray.dir); const sse3f rdir(ray_rdir.x,ray_rdir.y,ray_rdir.z); const ssef rayNear(ray.tnear); const ssef rayFar (ray.tfar); /*! pop node from stack */ while (true) { /* finish when the stack is empty */ if (unlikely(stackPtr == stack)) break; Base* cur = *(--stackPtr); /*! this is an inner node */ if (likely(cur->isNode())) { STAT3(shadow.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur->node(); const ssef* pNearX = (const ssef*)((const char*)node+nearX); const ssef* pNearY = (const ssef*)((const char*)node+nearY); const ssef* pNearZ = (const ssef*)((const char*)node+nearZ); const ssef tNearX = (norg.x + ssef(pNearX[0]) + ray.time*pNearX[1]) * rdir.x; const ssef tNearY = (norg.y + ssef(pNearY[0]) + ray.time*pNearY[1]) * rdir.y; const ssef tNearZ = (norg.z + ssef(pNearZ[0]) + ray.time*pNearZ[1]) * rdir.z; const ssef tNear = max(tNearX,tNearY,tNearZ,rayNear); const ssef* pFarX = (const ssef*)((const char*)node+farX); const ssef* pFarY = (const ssef*)((const char*)node+farY); const ssef* pFarZ = (const ssef*)((const char*)node+farZ); const ssef tFarX = (norg.x + ssef(pFarX[0]) + ray.time*pFarX[1]) * rdir.x; const ssef tFarY = (norg.y + ssef(pFarY[0]) + ray.time*pFarY[1]) * rdir.y; const ssef tFarZ = (norg.z + ssef(pFarZ[0]) + ray.time*pFarZ[1]) * rdir.z; const ssef tFar = min(tFarX,tFarY,tFarZ,rayFar); size_t _hit = movemask(tNear <= tFar); /*! push hit nodes onto stack */ if (likely(_hit == 0)) continue; size_t r = __bsf(_hit); _hit = __btc(_hit,r); *stackPtr = node->child[r]; stackPtr++; if (likely(_hit == 0)) continue; r = __bsf(_hit); _hit = __btc(_hit,r); *stackPtr = node->child[r]; stackPtr++; if (likely(_hit == 0)) continue; r = __bsf(_hit); _hit = __btc(_hit,r); *stackPtr = node->child[r]; stackPtr++; if (likely(_hit == 0)) continue; r = __bsf(_hit); _hit = __btc(_hit,r); *stackPtr = node->child[r]; stackPtr++; } /*! this is a leaf node */ else { STAT3(shadow.trav_leaves,1,1,1); size_t num; Triangle* tri = (Triangle*) cur->leaf(num); for (size_t i=0; i<num; i++) if (TriangleIntersector::occluded(ray,tri[i],bvh->geometry)) { ray.geomID = 0; break; } } } AVX_ZERO_UPPER(); }
__forceinline bool BVH4Intersector4Hybrid<PrimitiveIntersector4>::occluded1(const BVH4* bvh, NodeRef root, size_t k, Ray4& ray, const sse3f& ray_org, const sse3f& ray_dir, const sse3f& ray_rdir, const ssef& ray_tnear, const ssef& ray_tfar) { /*! stack state */ NodeRef stack[stackSizeSingle]; //!< stack of nodes that still need to get traversed NodeRef* stackPtr = stack+1; //!< current stack pointer NodeRef* stackEnd = stack+stackSizeSingle; stack[0] = root; /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray_dir.x[k] >= 0.0f ? 0*sizeof(ssef) : 1*sizeof(ssef); const size_t nearY = ray_dir.y[k] >= 0.0f ? 2*sizeof(ssef) : 3*sizeof(ssef); const size_t nearZ = ray_dir.z[k] >= 0.0f ? 4*sizeof(ssef) : 5*sizeof(ssef); /*! load the ray into SIMD registers */ const sse3f org (ray_org .x[k],ray_org .y[k],ray_org .z[k]); const sse3f rdir(ray_rdir.x[k],ray_rdir.y[k],ray_rdir.z[k]); const sse3f norg = -org, org_rdir(org*rdir); const ssef rayNear(ray_tnear[k]), rayFar(ray_tfar[k]); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = (NodeRef) *stackPtr; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(shadow.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(); const size_t farX = nearX ^ 16, farY = nearY ^ 16, farZ = nearZ ^ 16; #if defined (__AVX2__) const ssef tNearX = msub(load4f((const char*)node+nearX), rdir.x, org_rdir.x); const ssef tNearY = msub(load4f((const char*)node+nearY), rdir.y, org_rdir.y); const ssef tNearZ = msub(load4f((const char*)node+nearZ), rdir.z, org_rdir.z); const ssef tFarX = msub(load4f((const char*)node+farX ), rdir.x, org_rdir.x); const ssef tFarY = msub(load4f((const char*)node+farY ), rdir.y, org_rdir.y); const ssef tFarZ = msub(load4f((const char*)node+farZ ), rdir.z, org_rdir.z); #else const ssef tNearX = (norg.x + load4f((const char*)node+nearX)) * rdir.x; const ssef tNearY = (norg.y + load4f((const char*)node+nearY)) * rdir.y; const ssef tNearZ = (norg.z + load4f((const char*)node+nearZ)) * rdir.z; const ssef tFarX = (norg.x + load4f((const char*)node+farX )) * rdir.x; const ssef tFarY = (norg.y + load4f((const char*)node+farY )) * rdir.y; const ssef tFarZ = (norg.z + load4f((const char*)node+farZ )) * rdir.z; #endif #if defined(__SSE4_1__) const ssef tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,rayNear)); const ssef tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,rayFar )); const sseb vmask = cast(tNear) > cast(tFar); size_t mask = movemask(vmask)^0xf; #else const ssef tNear = max(tNearX,tNearY,tNearZ,rayNear); const ssef tFar = min(tFarX ,tFarY ,tFarZ ,rayFar); const sseb vmask = tNear <= tFar; size_t mask = movemask(vmask); #endif /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); assert(cur != BVH4::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const float d0 = tNear[r]; r = __bscf(mask); NodeRef c1 = node->child(r); const float d1 = tNear[r]; assert(c0 != BVH4::emptyNode); assert(c1 != BVH4::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { *stackPtr = c1; stackPtr++; cur = c0; continue; } else { *stackPtr = c0; stackPtr++; cur = c1; continue; } } assert(stackPtr < stackEnd); *stackPtr = c0; stackPtr++; assert(stackPtr < stackEnd); *stackPtr = c1; stackPtr++; /*! three children are hit */ r = __bscf(mask); cur = node->child(r); assert(cur != BVH4::emptyNode); if (likely(mask == 0)) continue; assert(stackPtr < stackEnd); *stackPtr = cur; stackPtr++; /*! four children are hit */ cur = node->child(3); assert(cur != BVH4::emptyNode); } /*! this is a leaf node */ STAT3(shadow.trav_leaves,1,1,1); size_t num; Primitive* prim = (Primitive*) cur.leaf(num); if (PrimitiveIntersector4::occluded(ray,k,prim,num,bvh->geometry)) { ray.geomID[k] = 0; return true; } } return false; }
void kdtreebenthin::draw<1>(scene& scene, ray4* r, hit4* hit4) { unsigned int signx = movemask(r->D().x()); unsigned int signy = movemask(r->D().y()); unsigned int signz = movemask(r->D().z()); //If the traversal direction is not same for all rays we // do a single ray traversal if (((signx - 1) < 14) // sign of x is 0xF or 0 || ((signy - 1) < 14) // sign of y is 0xF or 0 || ((signz - 1) < 14)) { // sign of z is 0xF or 0 hit hit[4]; for (int i = 0; i < 4; ++i) { vec3f d(r->D().x()[i], r->D().y()[i], r->D().z()[i]); ray ray(r->O(), d); hit[i].prim = -1; draw(scene, ray, hit[i]); } hit4->prim = ssei(hit[0].prim, hit[1].prim, hit[2].prim, hit[3].prim); hit4->u = ssef(hit[0].u, hit[1].u, hit[2].u, hit[3].u); hit4->v = ssef(hit[0].v, hit[1].v, hit[2].v, hit[3].v); return; } ssef tnear, tfar; _boundingBox.clip(*r, tnear, tfar); if (movemask(tnear >= tfar) == 0xF) return; const unsigned int dir[3][2] = { { signx & 1 , 1 - (signx & 1) }, { signy & 1 , 1 - (signy & 1) }, { signz & 1 , 1 - (signz & 1) } }; ssef far[MAX_STACK_SIZE]; ssef near[MAX_STACK_SIZE]; int nodes[MAX_STACK_SIZE]; //push dummyNode onto stack which will cause us to exit nodes[0] = 0; far[0] = BPRAY_INF; uint32_t stackptr = 1; kdnode* currNode = _nodes + 1; int activemask = 0xF; #if MAILBOX static uint64_t rayid = 0; __sync_add_and_fetch(&rayid, 1); #endif while (true) { if (!currNode->isLeaf()) { const int axis = currNode->getAxis(); const int front = currNode->getLeft() + dir[axis][0]; const int back = currNode->getLeft() + dir[axis][1]; const ssef dist = currNode->getSplit() - r->O()[axis]; const ssef t = dist * r->rcpD()[axis]; currNode = _nodes + back; if (!(movemask(tnear <= t) & activemask)) continue; currNode = _nodes + front; if (!(movemask(tfar >= t) & activemask)) continue; nodes[stackptr] = back; near[stackptr] = max(tnear, t); far[stackptr] = tfar; tfar = min(tfar, t); activemask &= movemask(tnear <= tfar); ++stackptr; } else { int primidx = currNode->getPrimitiveOffset(); int primcount = currNode->getNumPrims(); for (int i = 0; i != primcount; ++i) { int t = _prims[primidx + i]; //prefetch int t2 = _prims[primidx + i + 1]; _mm_prefetch((char*)&scene._accels[t2], _MM_HINT_T0); #if MAILBOX //mailboxing if (mbox.find(scene, rayid, t)) continue; #endif scene.intersect(t, *r, *hit4); #if MAILBOX mbox.add(scene, rayid, t); #endif } if (movemask(tfar < r->tfar) == 0) return; --stackptr; currNode = nodes[stackptr] + _nodes; tfar = far[stackptr]; tnear = near[stackptr]; activemask = movemask(tnear <= tfar); } } }
void BVH4Intersector1<PrimitiveIntersector>::occluded(const BVH4* bvh, Ray& ray) { /*! stack state */ NodeRef stack[stackSize]; //!< stack of nodes that still need to get traversed NodeRef* stackPtr = stack+1; //!< current stack pointer NodeRef* stackEnd = stack+stackSize; stack[0] = bvh->root; /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray.dir.x >= 0 ? 0*sizeof(ssef) : 1*sizeof(ssef); const size_t nearY = ray.dir.y >= 0 ? 2*sizeof(ssef) : 3*sizeof(ssef); const size_t nearZ = ray.dir.z >= 0 ? 4*sizeof(ssef) : 5*sizeof(ssef); #if 0 // FIXME: why is this slower /*! load the ray */ Vec3fa ray_org = ray.org; Vec3fa ray_dir = ray.dir; ssef ray_near = max(ray.tnear,FLT_MIN); // we do not support negative tnear values in this kernel due to integer optimizations ssef ray_far = ray.tfar; #if defined(__FIX_RAYS__) const float float_range = 0.1f*FLT_MAX; ray_org = clamp(ray_org,Vec3fa(-float_range),Vec3fa(+float_range)); ray_dir = clamp(ray_dir,Vec3fa(-float_range),Vec3fa(+float_range)); ray_far = min(ray_far,float(inf)); #endif const Vec3fa ray_rdir = rcp_safe(ray_dir); const sse3f org(ray_org), dir(ray_dir); const sse3f norg(-ray_org), rdir(ray_rdir), org_rdir(ray_org*ray_rdir); #else /*! load the ray into SIMD registers */ const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const Vec3fa ray_rdir = rcp_safe(ray.dir); const sse3f rdir(ray_rdir.x,ray_rdir.y,ray_rdir.z); const Vec3fa ray_org_rdir = ray.org*ray_rdir; const sse3f org_rdir(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z); const ssef ray_near(ray.tnear); ssef ray_far(ray.tfar); #endif /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = (NodeRef) *stackPtr; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(shadow.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(); const size_t farX = nearX ^ 16, farY = nearY ^ 16, farZ = nearZ ^ 16; #if defined (__AVX2__) const ssef tNearX = msub(load4f((const char*)node+nearX), rdir.x, org_rdir.x); const ssef tNearY = msub(load4f((const char*)node+nearY), rdir.y, org_rdir.y); const ssef tNearZ = msub(load4f((const char*)node+nearZ), rdir.z, org_rdir.z); const ssef tFarX = msub(load4f((const char*)node+farX ), rdir.x, org_rdir.x); const ssef tFarY = msub(load4f((const char*)node+farY ), rdir.y, org_rdir.y); const ssef tFarZ = msub(load4f((const char*)node+farZ ), rdir.z, org_rdir.z); #else const ssef tNearX = (norg.x + load4f((const char*)node+nearX)) * rdir.x; const ssef tNearY = (norg.y + load4f((const char*)node+nearY)) * rdir.y; const ssef tNearZ = (norg.z + load4f((const char*)node+nearZ)) * rdir.z; const ssef tFarX = (norg.x + load4f((const char*)node+farX )) * rdir.x; const ssef tFarY = (norg.y + load4f((const char*)node+farY )) * rdir.y; const ssef tFarZ = (norg.z + load4f((const char*)node+farZ )) * rdir.z; #endif #if defined(__SSE4_1__) const ssef tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,ray_near)); const ssef tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,ray_far )); const sseb vmask = cast(tNear) > cast(tFar); size_t mask = movemask(vmask)^0xf; #else const ssef tNear = max(tNearX,tNearY,tNearZ,ray_near); const ssef tFar = min(tFarX ,tFarY ,tFarZ ,ray_far); const sseb vmask = tNear <= tFar; size_t mask = movemask(vmask); #endif /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); assert(cur != BVH4::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const unsigned int d0 = ((unsigned int*)&tNear)[r]; r = __bscf(mask); NodeRef c1 = node->child(r); const unsigned int d1 = ((unsigned int*)&tNear)[r]; assert(c0 != BVH4::emptyNode); assert(c1 != BVH4::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { *stackPtr = c1; stackPtr++; cur = c0; continue; } else { *stackPtr = c0; stackPtr++; cur = c1; continue; } } assert(stackPtr < stackEnd); *stackPtr = c0; stackPtr++; assert(stackPtr < stackEnd); *stackPtr = c1; stackPtr++; /*! three children are hit */ r = __bscf(mask); cur = node->child(r); assert(cur != BVH4::emptyNode); if (likely(mask == 0)) continue; assert(stackPtr < stackEnd); *stackPtr = cur; stackPtr++; /*! four children are hit */ cur = node->child(3); assert(cur != BVH4::emptyNode); } /*! this is a leaf node */ STAT3(shadow.trav_leaves,1,1,1); size_t num; Primitive* prim = (Primitive*) cur.leaf(num); if (PrimitiveIntersector::occluded(ray,prim,num,bvh->geometry)) { ray.geomID = 0; break; } } AVX_ZERO_UPPER(); }
void BVHNIntersectorKHybrid<N,K,types,robust,PrimitiveIntersectorK,single>::intersect(vint<K>* __restrict__ valid_i, BVH* __restrict__ bvh, RayK<K>& __restrict__ ray, IntersectContext* context) { /* filter out invalid rays */ vbool<K> valid = *valid_i == -1; #if defined(EMBREE_IGNORE_INVALID_RAYS) valid &= ray.valid(); #endif /* verify correct input */ assert(all(valid,ray.valid())); assert(all(valid,ray.tnear >= 0.0f)); assert(!(types & BVH_MB) || all(valid,(ray.time >= 0.0f) & (ray.time <= 1.0f))); /* if the rays belong to different time segments, immediately switch to single ray traversal */ Precalculations pre(valid,ray,bvh->numTimeSteps); size_t valid_bits = movemask(valid); const size_t valid_first = __bsf(valid_bits); if (unlikely((types & BVH_MB) && valid_bits && (movemask(pre.itime() == pre.itime(valid_first)) != valid_bits))) { intersectSingle(valid, bvh, pre, ray, context); AVX_ZERO_UPPER(); return; } /* load ray */ Vec3vfK ray_org = ray.org; Vec3vfK ray_dir = ray.dir; vfloat<K> ray_tnear = max(ray.tnear,0.0f); vfloat<K> ray_tfar = max(ray.tfar ,0.0f); const Vec3vfK rdir = rcp_safe(ray_dir); const Vec3vfK org(ray_org), org_rdir = org * rdir;
void BVH4Intersector1Bezier<PrimitiveIntersector>::intersect(const BVH4* bvh, Ray& ray) { /*! perform per ray precalculations required by the primitive intersector */ Precalculations pre(ray); /*! stack state */ StackItemInt32<NodeRef> stack[stackSize]; //!< stack of nodes StackItemInt32<NodeRef>* stackPtr = stack+1; //!< current stack pointer StackItemInt32<NodeRef>* stackEnd = stack+stackSize; stack[0].ptr = bvh->root; stack[0].dist = neg_inf; /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray.dir.x >= 0.0f ? 0*sizeof(ssef) : 1*sizeof(ssef); const size_t nearY = ray.dir.y >= 0.0f ? 2*sizeof(ssef) : 3*sizeof(ssef); const size_t nearZ = ray.dir.z >= 0.0f ? 4*sizeof(ssef) : 5*sizeof(ssef); /*! load the ray into SIMD registers */ const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const Vec3fa ray_rdir = rcp_safe(ray.dir); const sse3f rdir(ray_rdir.x,ray_rdir.y,ray_rdir.z); const Vec3fa ray_org_rdir = ray.org*ray_rdir; const sse3f org_rdir(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z); const ssef ray_near(ray.tnear); ssef ray_far(ray.tfar); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = NodeRef(stackPtr->ptr); /*! if popped node is too far, pop next one */ if (unlikely(*(float*)&stackPtr->dist > ray.tfar)) continue; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(normal.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(); const size_t farX = nearX ^ 16, farY = nearY ^ 16, farZ = nearZ ^ 16; const ssef tFarX0 = abs((norg.x + load4f((const char*)node+farX )) * rdir.x); const ssef tFarY0 = abs((norg.y + load4f((const char*)node+farY )) * rdir.y); const ssef tFarZ0 = abs((norg.z + load4f((const char*)node+farZ )) * rdir.z); const ssef tFar0 = min(tFarX0 ,tFarY0 ,tFarZ0); const ssef radius = abs(ssef(ray.org.w) + tFar0 * ssef(ray.dir.w)); //const ssef radius = zero; //PRINT2(tFar0,radius); const ssef tLowerX = (norg.x + node->lower_x - radius) * rdir.x; const ssef tLowerY = (norg.y + node->lower_y - radius) * rdir.y; const ssef tLowerZ = (norg.z + node->lower_z - radius) * rdir.z; const ssef tUpperX = (norg.x + node->upper_x + radius) * rdir.x; const ssef tUpperY = (norg.y + node->upper_y + radius) * rdir.y; const ssef tUpperZ = (norg.z + node->upper_z + radius) * rdir.z; const ssef tNearX = min(tLowerX,tUpperX); const ssef tNearY = min(tLowerY,tUpperY); const ssef tNearZ = min(tLowerZ,tUpperZ); const ssef tFarX = max(tLowerX,tUpperX); const ssef tFarY = max(tLowerY,tUpperY); const ssef tFarZ = max(tLowerZ,tUpperZ); const ssef tNear = max(tNearX,tNearY,tNearZ,ray_near); const ssef tFar = min(tFarX ,tFarY ,tFarZ ,ray_far); const sseb vmask = tNear <= tFar; size_t mask = movemask(vmask); /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); //assert(cur != BVH4::emptyNode); // FIXME: enable these assertions again, currently traversing empty children continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const unsigned int d0 = ((unsigned int*)&tNear)[r]; r = __bscf(mask); NodeRef c1 = node->child(r); const unsigned int d1 = ((unsigned int*)&tNear)[r]; //assert(c0 != BVH4::emptyNode); //assert(c1 != BVH4::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; continue; } else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; continue; } } /*! Here starts the slow path for 3 or 4 hit children. We push * all nodes onto the stack to sort them there. */ assert(stackPtr < stackEnd); stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; assert(stackPtr < stackEnd); stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); NodeRef c = node->child(r); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; //assert(c != BVH4::emptyNode); if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); c = node->child(r); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; //assert(c != BVH4::emptyNode); sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; } /*! this is a leaf node */ STAT3(normal.trav_leaves,1,1,1); size_t num; Primitive* prim = (Primitive*) cur.leaf(num); PrimitiveIntersector::intersect(pre,ray,prim,num,bvh->geometry); ray_far = ray.tfar; } }