// We create packets and directly fill each zBuffer tile. Note that we // really store t values void TaskRayTraceHiZ::run(size_t taskID) { const uint32 taskX = taskID % this->taskXNum; const uint32 taskY = taskID / this->taskXNum; const uint32 startX = taskX * this->width; const uint32 startY = taskY * this->height; const uint32 endX = startX + this->width; const uint32 endY = startY + this->height; uint32 tileY = startY / HiZ::Tile::height; for (uint32 y = startY; y < endY; y += RayPacket::height, ++tileY) { uint32 tileX = startX / HiZ::Tile::width; for (uint32 x = startX; x < endX; x += RayPacket::width, ++tileX) { RayPacket pckt; PacketHit hit; gen.generate(pckt, x, y); intersector->traverse(pckt, hit); ssef zmin(inf), zmax(neg_inf); const uint32 tileID = tileX + tileY * zBuffer->tileXNum; PF_ASSERT(tileID < zBuffer->tileNum); HiZ::Tile &tile = zBuffer->tiles[tileID]; for (uint32 chunkID = 0; chunkID < HiZ::Tile::chunkNum; ++chunkID) { //const ssef t = hit.t[chunkID]; const ssef t = hit.t[chunkID] *dot(sse3f(view.x,view.y,view.z), pckt.dir[chunkID]); tile.z[chunkID] = t; zmin = min(zmin, t); zmax = max(zmax, t); } tile.zmin = reduce_min(zmin)[0]; tile.zmax = reduce_max(zmax)[0]; } } }
PerspectiveFrustum::PerspectiveFrustum(const RTCamera &cam, Ref<HiZ> hiz) : hiz(hiz) { this->org_aos = ssef(cam.org.x, cam.org.y, cam.org.z, 0.f); this->view_aos = ssef(cam.view.x, cam.view.y, cam.view.z, 0.f); this->org = sse3f(cam.org.x, cam.org.y, cam.org.z); this->view = sse3f(cam.view.x, cam.view.y, cam.view.z); this->xAxis = sse3f(cam.xAxis.x, cam.xAxis.y, cam.xAxis.z); this->zAxis = sse3f(cam.zAxis.x, cam.zAxis.y, cam.zAxis.z); this->yMaxSinAngle = sin(cam.fov * float(pi) / 360.f); this->xMaxSinAngle = cam.ratio * this->yMaxSinAngle; this->yMaxInvTanAngle = 1.f / tan(cam.fov * float(pi) / 360.f); this->xMaxInvTanAngle = this->yMaxInvTanAngle / cam.ratio; this->xAxis = normalize(this->xAxis); this->zAxis = normalize(this->zAxis); this->windowing = ssef(float(hiz->tileXNum) * 0.5f, float(hiz->tileXNum) * 0.5f, float(hiz->tileYNum) * 0.5f, float(hiz->tileYNum) * 0.5f); this->hizExtent = ssef(float(hiz->tileXNum-1), float(hiz->tileXNum-1), float(hiz->tileYNum-1), float(hiz->tileYNum-1)); }
void BVH4Intersector4Hybrid<PrimitiveIntersector4>::occluded(sseb* valid_i, BVH4* bvh, Ray4& ray) { /* load ray */ const sseb valid = *valid_i; sseb terminated = !valid; sse3f ray_org = ray.org, ray_dir = ray.dir; ssef ray_tnear = ray.tnear, ray_tfar = ray.tfar; #if defined(__FIX_RAYS__) const ssef float_range = 0.1f*FLT_MAX; ray_org = clamp(ray_org,sse3f(-float_range),sse3f(+float_range)); ray_dir = clamp(ray_dir,sse3f(-float_range),sse3f(+float_range)); ray_tnear = max(ray_tnear,FLT_MIN); ray_tfar = min(ray_tfar,float(inf)); #endif const sse3f rdir = rcp_safe(ray_dir); const sse3f org(ray_org), org_rdir = org * rdir; ray_tnear = select(valid,ray_tnear,ssef(pos_inf)); ray_tfar = select(valid,ray_tfar ,ssef(neg_inf)); const ssef inf = ssef(pos_inf); /* allocate stack and push root node */ ssef stack_near[stackSizeChunk]; NodeRef stack_node[stackSizeChunk]; stack_node[0] = BVH4::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* stackEnd = stack_node+stackSizeChunk; NodeRef* __restrict__ sptr_node = stack_node + 2; ssef* __restrict__ sptr_near = stack_near + 2; while (1) { /* pop next node from stack */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; NodeRef curNode = *sptr_node; if (unlikely(curNode == BVH4::invalidNode)) { assert(sptr_node == stack_node); break; } /* cull node if behind closest hit point */ ssef curDist = *sptr_near; const sseb active = curDist < ray_tfar; if (unlikely(none(active))) continue; /* switch to single ray traversal */ #if !defined(__WIN32__) || defined(__X86_64__) size_t bits = movemask(active); if (unlikely(__popcnt(bits) <= SWITCH_THRESHOLD)) { for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) { if (occluded1(bvh,curNode,i,ray,ray_org,ray_dir,rdir,ray_tnear,ray_tfar)) terminated[i] = -1; } if (all(terminated)) break; ray_tfar = select(terminated,ssef(neg_inf),ray_tfar); continue; } #endif while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf())) break; const sseb valid_node = ray_tfar > curDist; STAT3(shadow.trav_nodes,1,popcnt(valid_node),4); const Node* __restrict__ const node = curNode.node(); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; curNode = *sptr_node; curDist = *sptr_near; #pragma unroll(4) for (unsigned i=0; i<4; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH4::emptyNode)) break; #if defined(__AVX2__) const ssef lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x); const ssef lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y); const ssef lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z); const ssef lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x); const ssef lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y); const ssef lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z); #else const ssef lclipMinX = (node->lower_x[i] - org.x) * rdir.x; const ssef lclipMinY = (node->lower_y[i] - org.y) * rdir.y; const ssef lclipMinZ = (node->lower_z[i] - org.z) * rdir.z; const ssef lclipMaxX = (node->upper_x[i] - org.x) * rdir.x; const ssef lclipMaxY = (node->upper_y[i] - org.y) * rdir.y; const ssef lclipMaxZ = (node->upper_z[i] - org.z) * rdir.z; #endif #if defined(__SSE4_1__) const ssef lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); const ssef lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); const sseb lhit = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar); #else const ssef lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const ssef lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const sseb lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); #endif /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); assert(child != BVH4::emptyNode); const ssef childDist = select(lhit,lnearP,inf); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = curNode; *(sptr_near-1) = curDist; curDist = childDist; curNode = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } } /* return if stack is empty */ if (unlikely(curNode == BVH4::invalidNode)) { assert(sptr_node == stack_node); break; } /* intersect leaf */ const sseb valid_leaf = ray_tfar > curDist; STAT3(shadow.trav_leaves,1,popcnt(valid_leaf),4); size_t items; const Primitive* prim = (Primitive*) curNode.leaf(items); terminated |= PrimitiveIntersector4::occluded(!terminated,ray,prim,items,bvh->geometry); if (all(terminated)) break; ray_tfar = select(terminated,ssef(neg_inf),ray_tfar); } store4i(valid & terminated,&ray.geomID,0); AVX_ZERO_UPPER(); }
bool BVH2Traverser::occluded(const Ray& ray) const { /*! stack state */ int stackPtr = 0; //!< current stack pointer int stack[1+BVH2<Triangle4>::maxDepth]; //!< stack of nodes that still need to get traversed int cur = bvh->root; //!< in cur we track the ID of the current node /*! precomputed shuffles, to switch lower and upper bounds depending on ray direction */ const ssei identity = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); const ssei swap = _mm_set_epi8( 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); const ssei shuffleX = ray.dir.x >= 0 ? identity : swap; const ssei shuffleY = ray.dir.y >= 0 ? identity : swap; const ssei shuffleZ = ray.dir.z >= 0 ? identity : swap; /*! load the ray into SIMD registers */ const ssei pn = ssei(0x00000000,0x00000000,0x80000000,0x80000000); const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const sse3f rdir = sse3f(ssef(ray.rdir.x) ^ pn, ssef(ray.rdir.y) ^ pn, ssef(ray.rdir.z) ^ pn); ssef nearFar(ray.near, ray.near, -ray.far, -ray.far); BVH2<Triangle4>::Node* nodes = bvh->nodes; while (true) { /*! this is an inner node */ while (__builtin_expect(cur >= 0, true)) { /*! Single ray intersection with box of both children. See bvh2.h for node layout. */ const BVH2<Triangle4>::Node& node = bvh->node(nodes,cur); const ssef tNearFarX = (shuffle8(node.lower_upper_x,shuffleX) + norg.x) * rdir.x; const ssef tNearFarY = (shuffle8(node.lower_upper_y,shuffleY) + norg.y) * rdir.y; const ssef tNearFarZ = (shuffle8(node.lower_upper_z,shuffleZ) + norg.z) * rdir.z; const ssef tNearFar = max(tNearFarX,tNearFarY,tNearFarZ,nearFar) ^ pn; const sseb lrhit = tNearFar <= shuffle8(tNearFar,swap); /*! if two children hit, push far node onto stack and continue with closer node */ if (__builtin_expect(lrhit[0] != 0 && lrhit[1] != 0, true)) { if (tNearFar[0] < tNearFar[1]) { stack[stackPtr++] = node.child[1]; cur = node.child[0]; } else { stack[stackPtr++] = node.child[0]; cur = node.child[1]; } } /*! if one child hit, continue with that child */ else { if (lrhit[0] != 0) cur = node.child[0]; else if (lrhit[1] != 0) cur = node.child[1]; else goto pop_node; } } /*! leaf node, intersect all triangles */ { cur ^= 0x80000000; const size_t ofs = size_t(cur) >> 5; const size_t num = size_t(cur) & 0x1F; for (size_t i=ofs; i<ofs+num; i++) if (bvh->triangles[i].occluded(ray)) return true; } /*! pop next node from stack */ pop_node: if (__builtin_expect(stackPtr == 0, false)) break; cur = stack[--stackPtr]; } return false; }
void BVH2Intersector<TriangleIntersector>::intersect(const Ray& ray, Hit& hit) const { AVX_ZERO_UPPER(); STAT3(normal.travs,1,1,1); struct StackItem { Base* ptr; //!< node pointer float dist; //!< distance of node }; /*! stack state */ StackItem stack[1+BVH2::maxDepth]; //!< stack of nodes that still need to get traversed StackItem* stackPtr = stack; //!< current stack pointer Base* cur = bvh->root; //!< in cur we track the ID of the current node /*! precomputed shuffles, to switch lower and upper bounds depending on ray direction */ const ssei identity = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); const ssei swap = _mm_set_epi8( 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); const ssei shuffleX = ray.dir.x >= 0 ? identity : swap; const ssei shuffleY = ray.dir.y >= 0 ? identity : swap; const ssei shuffleZ = ray.dir.z >= 0 ? identity : swap; /*! load the ray into SIMD registers */ const ssei pn = ssei(0x00000000,0x00000000,0x80000000,0x80000000); const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const sse3f rdir = sse3f(ssef(ray.rdir.x) ^ pn, ssef(ray.rdir.y) ^ pn, ssef(ray.rdir.z) ^ pn); ssef nearFar(ray.near, ray.near, -ray.far, -ray.far); hit.t = min(hit.t,ray.far); while (true) { /*! downtraversal loop */ while (likely(cur->isNode())) { /*! single ray intersection with box of both children. */ const Node* node = cur->node(); const ssef tNearFarX = (shuffle8(node->lower_upper_x,shuffleX) + norg.x) * rdir.x; const ssef tNearFarY = (shuffle8(node->lower_upper_y,shuffleY) + norg.y) * rdir.y; const ssef tNearFarZ = (shuffle8(node->lower_upper_z,shuffleZ) + norg.z) * rdir.z; const ssef tNearFar = max(tNearFarX,tNearFarY,tNearFarZ,nearFar) ^ pn; const sseb lrhit = tNearFar <= shuffle8(tNearFar,swap); /*! if two children hit, push far node onto stack and continue with closer node */ if (likely(lrhit[0] != 0 && lrhit[1] != 0)) { if (likely(tNearFar[0] < tNearFar[1])) { stackPtr->ptr = node->child[1]; stackPtr->dist = tNearFar[1]; cur = node->child[0]; stackPtr++; } else { stackPtr->ptr = node->child[0]; stackPtr->dist = tNearFar[0]; cur = node->child[1]; stackPtr++; } } /*! if one child hit, continue with that child */ else { if (likely(lrhit[0] != 0)) cur = node->child[0]; else if (likely(lrhit[1] != 0)) cur = node->child[1]; else goto pop_node; } } /*! leaf node, intersect all triangles */ { STAT3(shadow.trav_leaves,1,1,1); size_t num; Triangle* tri = (Triangle*) cur->leaf(num); for (size_t i=0; i<num; i++) TriangleIntersector::intersect(ray,hit,tri[i],bvh->vertices); nearFar = shuffle<0,1,2,3>(nearFar,-hit.t); } /*! pop next node from stack */ pop_node: if (unlikely(stackPtr == stack)) break; --stackPtr; cur = stackPtr->ptr; if (unlikely(stackPtr->dist > hit.t)) goto pop_node; } AVX_ZERO_UPPER(); }
bool BVH2Intersector<TriangleIntersector>::occluded(const Ray& ray) const { AVX_ZERO_UPPER(); /*! stack state */ Base* stack[1+BVH2::maxDepth]; //!< stack of nodes that still need to get traversed Base** stackPtr = stack; //!< current stack pointer Base* cur = bvh->root; //!< in cur we track the ID of the current node /*! precomputed shuffles, to switch lower and upper bounds depending on ray direction */ const ssei identity = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); const ssei swap = _mm_set_epi8( 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); const ssei shuffleX = ray.dir.x >= 0 ? identity : swap; const ssei shuffleY = ray.dir.y >= 0 ? identity : swap; const ssei shuffleZ = ray.dir.z >= 0 ? identity : swap; /*! load the ray into SIMD registers */ const ssei pn = ssei(0x00000000,0x00000000,0x80000000,0x80000000); const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const sse3f rdir = sse3f(ssef(ray.rdir.x) ^ pn, ssef(ray.rdir.y) ^ pn, ssef(ray.rdir.z) ^ pn); ssef nearFar(ray.near, ray.near, -ray.far, -ray.far); while (true) { /*! this is an inner node */ while (likely(cur->isNode())) { /*! Single ray intersection with box of both children. See bvh2i.h for node layout. */ const Node* node = cur->node(); const ssef tNearFarX = (shuffle8(node->lower_upper_x,shuffleX) + norg.x) * rdir.x; const ssef tNearFarY = (shuffle8(node->lower_upper_y,shuffleY) + norg.y) * rdir.y; const ssef tNearFarZ = (shuffle8(node->lower_upper_z,shuffleZ) + norg.z) * rdir.z; const ssef tNearFar = max(tNearFarX,tNearFarY,tNearFarZ,nearFar) ^ pn; const sseb lrhit = tNearFar <= shuffle8(tNearFar,swap); /*! if two children hit, push far node onto stack and continue with closer node */ if (likely(lrhit[0] != 0 && lrhit[1] != 0)) { *stackPtr++ = node->child[0]; cur = node->child[1]; } /*! if one child hit, continue with that child */ else { if (lrhit[0] != 0) cur = node->child[0]; else if (lrhit[1] != 0) cur = node->child[1]; else goto pop_node; } } /*! leaf node, intersect all triangles */ { STAT3(shadow.trav_leaves,1,1,1); size_t num; Triangle* tri = (Triangle*) cur->leaf(num); for (size_t i=0; i<num; i++) if (TriangleIntersector::occluded(ray,tri[i],bvh->vertices)) { AVX_ZERO_UPPER(); return true; } } /*! pop next node from stack */ pop_node: if (unlikely(stackPtr == stack)) break; cur = *(--stackPtr); } AVX_ZERO_UPPER(); return false; }