void BVH4Intersector4Single<types,robust,PrimitiveIntersector4>::intersect(sseb* valid_i, BVH4* bvh, Ray4& ray) { /* load ray */ const sseb valid0 = *valid_i; sse3f ray_org = ray.org; sse3f ray_dir = ray.dir; ssef ray_tnear = ray.tnear, ray_tfar = ray.tfar; const sse3f rdir = rcp_safe(ray_dir); const sse3f org(ray_org), org_rdir = org * rdir; ray_tnear = select(valid0,ray_tnear,ssef(pos_inf)); ray_tfar = select(valid0,ray_tfar ,ssef(neg_inf)); const ssef inf = ssef(pos_inf); Precalculations pre(valid0,ray); /* compute near/far per ray */ sse3i nearXYZ; nearXYZ.x = select(rdir.x >= 0.0f,ssei(0*(int)sizeof(ssef)),ssei(1*(int)sizeof(ssef))); nearXYZ.y = select(rdir.y >= 0.0f,ssei(2*(int)sizeof(ssef)),ssei(3*(int)sizeof(ssef))); nearXYZ.z = select(rdir.z >= 0.0f,ssei(4*(int)sizeof(ssef)),ssei(5*(int)sizeof(ssef))); /* we have no packet implementation for OBB nodes yet */ size_t bits = movemask(valid0); for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) { intersect1(bvh, bvh->root, i, pre, ray, ray_org, ray_dir, rdir, ray_tnear, ray_tfar, nearXYZ); } AVX_ZERO_UPPER(); }
void FastInstanceIntersector4::intersect(sseb* valid, const UserGeometryScene::Instance* instance, Ray4& ray, size_t item) { const sse3f ray_org = ray.org; const sse3f ray_dir = ray.dir; const ssei ray_geomID = ray.geomID; const ssei ray_instID = ray.instID; const AffineSpace3faSSE world2local(instance->world2local); ray.org = xfmPoint (world2local,ray_org); ray.dir = xfmVector(world2local,ray_dir); ray.geomID = -1; ray.instID = instance->id; instance->object->intersect4(valid,(RTCRay4&)ray); ray.org = ray_org; ray.dir = ray_dir; sseb nohit = ray.geomID == ssei(-1); ray.geomID = select(nohit,ray_geomID,ray.geomID); ray.instID = select(nohit,ray_instID,ray.instID); }
void kdtreebenthin::draw<1>(scene& scene, ray4* r, hit4* hit4) { unsigned int signx = movemask(r->D().x()); unsigned int signy = movemask(r->D().y()); unsigned int signz = movemask(r->D().z()); //If the traversal direction is not same for all rays we // do a single ray traversal if (((signx - 1) < 14) // sign of x is 0xF or 0 || ((signy - 1) < 14) // sign of y is 0xF or 0 || ((signz - 1) < 14)) { // sign of z is 0xF or 0 hit hit[4]; for (int i = 0; i < 4; ++i) { vec3f d(r->D().x()[i], r->D().y()[i], r->D().z()[i]); ray ray(r->O(), d); hit[i].prim = -1; draw(scene, ray, hit[i]); } hit4->prim = ssei(hit[0].prim, hit[1].prim, hit[2].prim, hit[3].prim); hit4->u = ssef(hit[0].u, hit[1].u, hit[2].u, hit[3].u); hit4->v = ssef(hit[0].v, hit[1].v, hit[2].v, hit[3].v); return; } ssef tnear, tfar; _boundingBox.clip(*r, tnear, tfar); if (movemask(tnear >= tfar) == 0xF) return; const unsigned int dir[3][2] = { { signx & 1 , 1 - (signx & 1) }, { signy & 1 , 1 - (signy & 1) }, { signz & 1 , 1 - (signz & 1) } }; ssef far[MAX_STACK_SIZE]; ssef near[MAX_STACK_SIZE]; int nodes[MAX_STACK_SIZE]; //push dummyNode onto stack which will cause us to exit nodes[0] = 0; far[0] = BPRAY_INF; uint32_t stackptr = 1; kdnode* currNode = _nodes + 1; int activemask = 0xF; #if MAILBOX static uint64_t rayid = 0; __sync_add_and_fetch(&rayid, 1); #endif while (true) { if (!currNode->isLeaf()) { const int axis = currNode->getAxis(); const int front = currNode->getLeft() + dir[axis][0]; const int back = currNode->getLeft() + dir[axis][1]; const ssef dist = currNode->getSplit() - r->O()[axis]; const ssef t = dist * r->rcpD()[axis]; currNode = _nodes + back; if (!(movemask(tnear <= t) & activemask)) continue; currNode = _nodes + front; if (!(movemask(tfar >= t) & activemask)) continue; nodes[stackptr] = back; near[stackptr] = max(tnear, t); far[stackptr] = tfar; tfar = min(tfar, t); activemask &= movemask(tnear <= tfar); ++stackptr; } else { int primidx = currNode->getPrimitiveOffset(); int primcount = currNode->getNumPrims(); for (int i = 0; i != primcount; ++i) { int t = _prims[primidx + i]; //prefetch int t2 = _prims[primidx + i + 1]; _mm_prefetch((char*)&scene._accels[t2], _MM_HINT_T0); #if MAILBOX //mailboxing if (mbox.find(scene, rayid, t)) continue; #endif scene.intersect(t, *r, *hit4); #if MAILBOX mbox.add(scene, rayid, t); #endif } if (movemask(tfar < r->tfar) == 0) return; --stackptr; currNode = nodes[stackptr] + _nodes; tfar = far[stackptr]; tnear = near[stackptr]; activemask = movemask(tnear <= tfar); } } }
void BVH4Intersector4Hybrid<types,robust,PrimitiveIntersector4>::intersect(sseb* valid_i, BVH4* bvh, Ray4& ray) { /* load ray */ const sseb valid0 = *valid_i; sse3f ray_org = ray.org; sse3f ray_dir = ray.dir; ssef ray_tnear = ray.tnear, ray_tfar = ray.tfar; const sse3f rdir = rcp_safe(ray_dir); const sse3f org(ray_org), org_rdir = org * rdir; ray_tnear = select(valid0,ray_tnear,ssef(pos_inf)); ray_tfar = select(valid0,ray_tfar ,ssef(neg_inf)); const ssef inf = ssef(pos_inf); Precalculations pre(valid0,ray); /* compute near/far per ray */ sse3i nearXYZ; nearXYZ.x = select(rdir.x >= 0.0f,ssei(0*(int)sizeof(ssef)),ssei(1*(int)sizeof(ssef))); nearXYZ.y = select(rdir.y >= 0.0f,ssei(2*(int)sizeof(ssef)),ssei(3*(int)sizeof(ssef))); nearXYZ.z = select(rdir.z >= 0.0f,ssei(4*(int)sizeof(ssef)),ssei(5*(int)sizeof(ssef))); /* allocate stack and push root node */ ssef stack_near[stackSizeChunk]; NodeRef stack_node[stackSizeChunk]; stack_node[0] = BVH4::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* stackEnd = stack_node+stackSizeChunk; NodeRef* __restrict__ sptr_node = stack_node + 2; ssef* __restrict__ sptr_near = stack_near + 2; while (1) pop: { /* pop next node from stack */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; NodeRef cur = *sptr_node; if (unlikely(cur == BVH4::invalidNode)) { assert(sptr_node == stack_node); break; } /* cull node if behind closest hit point */ ssef curDist = *sptr_near; const sseb active = curDist < ray_tfar; if (unlikely(none(active))) continue; /* switch to single ray traversal */ #if !defined(__WIN32__) || defined(__X86_64__) size_t bits = movemask(active); if (unlikely(__popcnt(bits) <= SWITCH_THRESHOLD)) { for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) { BVH4Intersector4Single<types,robust,PrimitiveIntersector4>::intersect1(bvh, cur, i, pre, ray, ray_org, ray_dir, rdir, ray_tnear, ray_tfar, nearXYZ); } ray_tfar = min(ray_tfar,ray.tfar); continue; } #endif while (1) { /* process normal nodes */ if (likely((types & 0x1) && cur.isNode())) { const sseb valid_node = ray_tfar > curDist; STAT3(normal.trav_nodes,1,popcnt(valid_node),4); const Node* __restrict__ const node = cur.node(); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; cur = *sptr_node; curDist = *sptr_near; #pragma unroll(4) for (unsigned i=0; i<BVH4::N; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH4::emptyNode)) break; ssef lnearP; const sseb lhit = node->intersect<robust>(i,org,rdir,org_rdir,ray_tnear,ray_tfar,lnearP); /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); assert(child != BVH4::emptyNode); const ssef childDist = select(lhit,lnearP,inf); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = cur; *(sptr_near-1) = curDist; curDist = childDist; cur = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } #if SWITCH_DURING_DOWN_TRAVERSAL == 1 // seems to be the best place for testing utilization if (unlikely(popcnt(ray_tfar > curDist) <= SWITCH_THRESHOLD)) { *sptr_node++ = cur; *sptr_near++ = curDist; goto pop; } #endif } /* process motion blur nodes */ else if (likely((types & 0x10) && cur.isNodeMB())) { const sseb valid_node = ray_tfar > curDist; STAT3(normal.trav_nodes,1,popcnt(valid_node),4); const BVH4::NodeMB* __restrict__ const node = cur.nodeMB(); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; cur = *sptr_node; curDist = *sptr_near; #pragma unroll(4) for (unsigned i=0; i<BVH4::N; i++) { const NodeRef child = node->child(i); if (unlikely(child == BVH4::emptyNode)) break; ssef lnearP; const sseb lhit = node->intersect(i,org,rdir,org_rdir,ray_tnear,ray_tfar,ray.time,lnearP); /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); assert(child != BVH4::emptyNode); const ssef childDist = select(lhit,lnearP,inf); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = cur; *(sptr_near-1) = curDist; curDist = childDist; cur = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } #if SWITCH_DURING_DOWN_TRAVERSAL == 1 // seems to be the best place for testing utilization if (unlikely(popcnt(ray_tfar > curDist) <= SWITCH_THRESHOLD)) { *sptr_node++ = cur; *sptr_near++ = curDist; goto pop; } #endif } else break; }
INLINE operator ssei ( void ) const { return ssei( _mm256_castsi256_si128(m256)); }
bool BVH2Traverser::occluded(const Ray& ray) const { /*! stack state */ int stackPtr = 0; //!< current stack pointer int stack[1+BVH2<Triangle4>::maxDepth]; //!< stack of nodes that still need to get traversed int cur = bvh->root; //!< in cur we track the ID of the current node /*! precomputed shuffles, to switch lower and upper bounds depending on ray direction */ const ssei identity = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); const ssei swap = _mm_set_epi8( 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); const ssei shuffleX = ray.dir.x >= 0 ? identity : swap; const ssei shuffleY = ray.dir.y >= 0 ? identity : swap; const ssei shuffleZ = ray.dir.z >= 0 ? identity : swap; /*! load the ray into SIMD registers */ const ssei pn = ssei(0x00000000,0x00000000,0x80000000,0x80000000); const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const sse3f rdir = sse3f(ssef(ray.rdir.x) ^ pn, ssef(ray.rdir.y) ^ pn, ssef(ray.rdir.z) ^ pn); ssef nearFar(ray.near, ray.near, -ray.far, -ray.far); BVH2<Triangle4>::Node* nodes = bvh->nodes; while (true) { /*! this is an inner node */ while (__builtin_expect(cur >= 0, true)) { /*! Single ray intersection with box of both children. See bvh2.h for node layout. */ const BVH2<Triangle4>::Node& node = bvh->node(nodes,cur); const ssef tNearFarX = (shuffle8(node.lower_upper_x,shuffleX) + norg.x) * rdir.x; const ssef tNearFarY = (shuffle8(node.lower_upper_y,shuffleY) + norg.y) * rdir.y; const ssef tNearFarZ = (shuffle8(node.lower_upper_z,shuffleZ) + norg.z) * rdir.z; const ssef tNearFar = max(tNearFarX,tNearFarY,tNearFarZ,nearFar) ^ pn; const sseb lrhit = tNearFar <= shuffle8(tNearFar,swap); /*! if two children hit, push far node onto stack and continue with closer node */ if (__builtin_expect(lrhit[0] != 0 && lrhit[1] != 0, true)) { if (tNearFar[0] < tNearFar[1]) { stack[stackPtr++] = node.child[1]; cur = node.child[0]; } else { stack[stackPtr++] = node.child[0]; cur = node.child[1]; } } /*! if one child hit, continue with that child */ else { if (lrhit[0] != 0) cur = node.child[0]; else if (lrhit[1] != 0) cur = node.child[1]; else goto pop_node; } } /*! leaf node, intersect all triangles */ { cur ^= 0x80000000; const size_t ofs = size_t(cur) >> 5; const size_t num = size_t(cur) & 0x1F; for (size_t i=ofs; i<ofs+num; i++) if (bvh->triangles[i].occluded(ray)) return true; } /*! pop next node from stack */ pop_node: if (__builtin_expect(stackPtr == 0, false)) break; cur = stack[--stackPtr]; } return false; }
size_t BVH4MB::rotate(Base* nodeID, size_t depth) { /*! nothing to rotate if we reached a leaf node. */ if (nodeID->isLeaf()) return 0; Node* parent = nodeID->node(); /*! rotate all children first */ ssei cdepth; for (size_t c=0; c<4; c++) cdepth[c] = (int)rotate(parent->child[c],depth+1); /* compute current area of all children */ ssef sizeX = parent->upper_x-parent->lower_x; ssef sizeY = parent->upper_y-parent->lower_y; ssef sizeZ = parent->upper_z-parent->lower_z; ssef childArea = sizeX*(sizeY + sizeZ) + sizeY*sizeZ; /*! transpose node bounds */ ssef plower0,plower1,plower2,plower3; transpose(parent->lower_x,parent->lower_y,parent->lower_z,ssef(zero),plower0,plower1,plower2,plower3); ssef pupper0,pupper1,pupper2,pupper3; transpose(parent->upper_x,parent->upper_y,parent->upper_z,ssef(zero),pupper0,pupper1,pupper2,pupper3); BBox<ssef> other0(plower0,pupper0), other1(plower1,pupper1), other2(plower2,pupper2), other3(plower3,pupper3); /*! Find best rotation. We pick a target child of a first child, and swap this with an other child. We perform the best such swap. */ float bestCost = pos_inf; int bestChild = -1, bestTarget = -1, bestOther = -1; for (size_t c=0; c<4; c++) { /*! ignore leaf nodes as we cannot descent into */ if (parent->child[c]->isLeaf()) continue; Node* child = parent->child[c]->node(); /*! transpose child bounds */ ssef clower0,clower1,clower2,clower3; transpose(child->lower_x,child->lower_y,child->lower_z,ssef(zero),clower0,clower1,clower2,clower3); ssef cupper0,cupper1,cupper2,cupper3; transpose(child->upper_x,child->upper_y,child->upper_z,ssef(zero),cupper0,cupper1,cupper2,cupper3); BBox<ssef> target0(clower0,cupper0), target1(clower1,cupper1), target2(clower2,cupper2), target3(clower3,cupper3); /*! put other0 at each target position */ float cost00 = halfArea3f(merge(other0 ,target1,target2,target3)); float cost01 = halfArea3f(merge(target0,other0 ,target2,target3)); float cost02 = halfArea3f(merge(target0,target1,other0 ,target3)); float cost03 = halfArea3f(merge(target0,target1,target2,other0 )); ssef cost0 = ssef(cost00,cost01,cost02,cost03); ssef min0 = vreduce_min(cost0); int pos0 = (int)__bsf(movemask(min0 == cost0)); /*! put other1 at each target position */ float cost10 = halfArea3f(merge(other1 ,target1,target2,target3)); float cost11 = halfArea3f(merge(target0,other1 ,target2,target3)); float cost12 = halfArea3f(merge(target0,target1,other1 ,target3)); float cost13 = halfArea3f(merge(target0,target1,target2,other1 )); ssef cost1 = ssef(cost10,cost11,cost12,cost13); ssef min1 = vreduce_min(cost1); int pos1 = (int)__bsf(movemask(min1 == cost1)); /*! put other2 at each target position */ float cost20 = halfArea3f(merge(other2 ,target1,target2,target3)); float cost21 = halfArea3f(merge(target0,other2 ,target2,target3)); float cost22 = halfArea3f(merge(target0,target1,other2 ,target3)); float cost23 = halfArea3f(merge(target0,target1,target2,other2 )); ssef cost2 = ssef(cost20,cost21,cost22,cost23); ssef min2 = vreduce_min(cost2); int pos2 = (int)__bsf(movemask(min2 == cost2)); /*! put other3 at each target position */ float cost30 = halfArea3f(merge(other3 ,target1,target2,target3)); float cost31 = halfArea3f(merge(target0,other3 ,target2,target3)); float cost32 = halfArea3f(merge(target0,target1,other3 ,target3)); float cost33 = halfArea3f(merge(target0,target1,target2,other3 )); ssef cost3 = ssef(cost30,cost31,cost32,cost33); ssef min3 = vreduce_min(cost3); int pos3 = (int)__bsf(movemask(min3 == cost3)); /*! find best other child */ ssef otherCost = ssef(extract<0>(min0),extract<0>(min1),extract<0>(min2),extract<0>(min3)); int pos[4] = { pos0,pos1,pos2,pos3 }; sseb valid = ssei(int(depth+1))+cdepth <= ssei(maxDepth); // only select swaps that fulfill depth constraints if (none(valid)) continue; size_t n = select_min(valid,otherCost); float cost = otherCost[n]-childArea[c]; //< increasing the original child bound is bad, decreasing good /*! accept a swap when it reduces cost and is not swapping a node with itself */ if (cost < bestCost && n != c) { bestCost = cost; bestChild = (int)c; bestOther = (int)n; bestTarget = pos[n]; } } /*! if we did not find a swap that improves the SAH then do nothing */ if (bestCost >= 0) return 1+reduce_max(cdepth); /*! perform the best found tree rotation */ Node* child = parent->child[bestChild]->node(); swap(parent,bestOther,child,bestTarget); parent->lower_x[bestChild] = reduce_min(child->lower_x); parent->lower_y[bestChild] = reduce_min(child->lower_y); parent->lower_z[bestChild] = reduce_min(child->lower_z); parent->upper_x[bestChild] = reduce_max(child->upper_x); parent->upper_y[bestChild] = reduce_max(child->upper_y); parent->upper_z[bestChild] = reduce_max(child->upper_z); parent->lower_dx[bestChild] = reduce_min(child->lower_dx); parent->lower_dy[bestChild] = reduce_min(child->lower_dy); parent->lower_dz[bestChild] = reduce_min(child->lower_dz); parent->upper_dx[bestChild] = reduce_max(child->upper_dx); parent->upper_dy[bestChild] = reduce_max(child->upper_dy); parent->upper_dz[bestChild] = reduce_max(child->upper_dz); /*! This returned depth is conservative as the child that was * pulled up in the tree could have been on the critical path. */ cdepth[bestOther]++; // bestOther was pushed down one level return 1+reduce_max(cdepth); }
void BVH2Intersector<TriangleIntersector>::intersect(const Ray& ray, Hit& hit) const { AVX_ZERO_UPPER(); STAT3(normal.travs,1,1,1); struct StackItem { Base* ptr; //!< node pointer float dist; //!< distance of node }; /*! stack state */ StackItem stack[1+BVH2::maxDepth]; //!< stack of nodes that still need to get traversed StackItem* stackPtr = stack; //!< current stack pointer Base* cur = bvh->root; //!< in cur we track the ID of the current node /*! precomputed shuffles, to switch lower and upper bounds depending on ray direction */ const ssei identity = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); const ssei swap = _mm_set_epi8( 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); const ssei shuffleX = ray.dir.x >= 0 ? identity : swap; const ssei shuffleY = ray.dir.y >= 0 ? identity : swap; const ssei shuffleZ = ray.dir.z >= 0 ? identity : swap; /*! load the ray into SIMD registers */ const ssei pn = ssei(0x00000000,0x00000000,0x80000000,0x80000000); const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const sse3f rdir = sse3f(ssef(ray.rdir.x) ^ pn, ssef(ray.rdir.y) ^ pn, ssef(ray.rdir.z) ^ pn); ssef nearFar(ray.near, ray.near, -ray.far, -ray.far); hit.t = min(hit.t,ray.far); while (true) { /*! downtraversal loop */ while (likely(cur->isNode())) { /*! single ray intersection with box of both children. */ const Node* node = cur->node(); const ssef tNearFarX = (shuffle8(node->lower_upper_x,shuffleX) + norg.x) * rdir.x; const ssef tNearFarY = (shuffle8(node->lower_upper_y,shuffleY) + norg.y) * rdir.y; const ssef tNearFarZ = (shuffle8(node->lower_upper_z,shuffleZ) + norg.z) * rdir.z; const ssef tNearFar = max(tNearFarX,tNearFarY,tNearFarZ,nearFar) ^ pn; const sseb lrhit = tNearFar <= shuffle8(tNearFar,swap); /*! if two children hit, push far node onto stack and continue with closer node */ if (likely(lrhit[0] != 0 && lrhit[1] != 0)) { if (likely(tNearFar[0] < tNearFar[1])) { stackPtr->ptr = node->child[1]; stackPtr->dist = tNearFar[1]; cur = node->child[0]; stackPtr++; } else { stackPtr->ptr = node->child[0]; stackPtr->dist = tNearFar[0]; cur = node->child[1]; stackPtr++; } } /*! if one child hit, continue with that child */ else { if (likely(lrhit[0] != 0)) cur = node->child[0]; else if (likely(lrhit[1] != 0)) cur = node->child[1]; else goto pop_node; } } /*! leaf node, intersect all triangles */ { STAT3(shadow.trav_leaves,1,1,1); size_t num; Triangle* tri = (Triangle*) cur->leaf(num); for (size_t i=0; i<num; i++) TriangleIntersector::intersect(ray,hit,tri[i],bvh->vertices); nearFar = shuffle<0,1,2,3>(nearFar,-hit.t); } /*! pop next node from stack */ pop_node: if (unlikely(stackPtr == stack)) break; --stackPtr; cur = stackPtr->ptr; if (unlikely(stackPtr->dist > hit.t)) goto pop_node; } AVX_ZERO_UPPER(); }
bool BVH2Intersector<TriangleIntersector>::occluded(const Ray& ray) const { AVX_ZERO_UPPER(); /*! stack state */ Base* stack[1+BVH2::maxDepth]; //!< stack of nodes that still need to get traversed Base** stackPtr = stack; //!< current stack pointer Base* cur = bvh->root; //!< in cur we track the ID of the current node /*! precomputed shuffles, to switch lower and upper bounds depending on ray direction */ const ssei identity = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); const ssei swap = _mm_set_epi8( 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); const ssei shuffleX = ray.dir.x >= 0 ? identity : swap; const ssei shuffleY = ray.dir.y >= 0 ? identity : swap; const ssei shuffleZ = ray.dir.z >= 0 ? identity : swap; /*! load the ray into SIMD registers */ const ssei pn = ssei(0x00000000,0x00000000,0x80000000,0x80000000); const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const sse3f rdir = sse3f(ssef(ray.rdir.x) ^ pn, ssef(ray.rdir.y) ^ pn, ssef(ray.rdir.z) ^ pn); ssef nearFar(ray.near, ray.near, -ray.far, -ray.far); while (true) { /*! this is an inner node */ while (likely(cur->isNode())) { /*! Single ray intersection with box of both children. See bvh2i.h for node layout. */ const Node* node = cur->node(); const ssef tNearFarX = (shuffle8(node->lower_upper_x,shuffleX) + norg.x) * rdir.x; const ssef tNearFarY = (shuffle8(node->lower_upper_y,shuffleY) + norg.y) * rdir.y; const ssef tNearFarZ = (shuffle8(node->lower_upper_z,shuffleZ) + norg.z) * rdir.z; const ssef tNearFar = max(tNearFarX,tNearFarY,tNearFarZ,nearFar) ^ pn; const sseb lrhit = tNearFar <= shuffle8(tNearFar,swap); /*! if two children hit, push far node onto stack and continue with closer node */ if (likely(lrhit[0] != 0 && lrhit[1] != 0)) { *stackPtr++ = node->child[0]; cur = node->child[1]; } /*! if one child hit, continue with that child */ else { if (lrhit[0] != 0) cur = node->child[0]; else if (lrhit[1] != 0) cur = node->child[1]; else goto pop_node; } } /*! leaf node, intersect all triangles */ { STAT3(shadow.trav_leaves,1,1,1); size_t num; Triangle* tri = (Triangle*) cur->leaf(num); for (size_t i=0; i<num; i++) if (TriangleIntersector::occluded(ray,tri[i],bvh->vertices)) { AVX_ZERO_UPPER(); return true; } } /*! pop next node from stack */ pop_node: if (unlikely(stackPtr == stack)) break; cur = *(--stackPtr); } AVX_ZERO_UPPER(); return false; }