void bumps_t::initialize ( const bump_specifier_t & b0, const bump_specifier_t & b1) { // Precompute the coefficients of four cubic polynomials in t, giving // the two smoothstep regions of the each of the two bump functions. v4f b0t = load4f (& b0.t0); // b0.t0 b0.t1 b0.t2 b0.t2 v4f b1t = load4f (& b1.t0); // b1.t0 b1.t1 b1.t2 b1.t2 v4f b0v = _mm_movelh_ps (load4f (& b0.v0), _mm_setzero_ps ()); // b0.v0 b0.v1 v4f b1v = _mm_movelh_ps (load4f (& b1.v0), _mm_setzero_ps ()); // b1.v0 b1.v1 v4f S = SHUFPS (b0t, b1t, (0, 2, 0, 2)); // b0.t0 b0.t2 b1.t0 b1.t2 v4f T = SHUFPS (b0t, b1t, (1, 3, 1, 3)); // b0.t1 b0.t3 b1.t1 b1.t3 v4f U = SHUFPS (b0v, b1v, (0, 2, 0, 2)); // b0.v0 0 b1.v0 0 v4f V1 = SHUFPS (b0v, b1v, (1, 0, 1, 0)); // b0.v1 b0.v0 b1.v1 b1.v0 v4f V2 = SHUFPS (b0v, b1v, (2, 1, 2, 1)); // 0 b0.v1 0 b1.v1 v4f V = V1 - V2; v4f d = T - S; v4f a = T + S; v4f m = (V - U) / (d * d * d); store4f (c [0], U + m * S * S * (a + d + d)); store4f (c [1], _mm_set1_ps (-6.0f) * m * S * T); store4f (c [2], _mm_set1_ps (+3.0f) * m * a); store4f (c [3], _mm_set1_ps (-2.0f) * m); store4f (S0, S); store4f (T0, T); store4f (U0, U); store4f (V0, V); }
v4f step_t::operator () (float t) const { // Evaluate the polynomial f by Estrin's method. Return // (0 0 0 0) if t < t0, // (f f f f) if t0 <= t < t1, // (1 1 1 1) if t > t1. v4f c4 = load4f (c); v4f one = { 1.0f, 1.0f, 1.0f, 1.0f }; v4f tttt = _mm_set1_ps (t); // t t t t v4f tt = _mm_unpacklo_ps (one, tttt); // 1 t 1 t v4f f0 = c4 * tt; // c0 c1*t c2 c3*t v4f ha = _mm_hadd_ps (f0, f0) * tt * tt; v4f f = _mm_hadd_ps (ha, ha); // f f f f v4f f1 = _mm_unpacklo_ps (f, one); // f 1 f 1 v4f tx = load4f (T); // t0 t1 t1 inf v4f lo = _mm_movelh_ps (tx, tx); // t0 t1 t0 t1 v4f hi = _mm_movehl_ps (tx, tx); // t1 inf t1 inf v4f sel = _mm_and_ps (_mm_cmpge_ps (tttt, lo), _mm_cmplt_ps (tttt, hi)); v4f val = _mm_and_ps (sel, f1); // f? 1? f? 1? return _mm_hadd_ps (val, val); }
// Returns { f, g, f, g }, where f = bump0 (t), g = bump1 (t). v4f bumps_t::operator () (float t) const { // Compute all four polynomials by Estrin's method, and mask and combine the // values according to the region of the graph to which t belongs. v4f s = _mm_set1_ps (t); v4f S = load4f (S0); v4f T = load4f (T0); v4f U = load4f (U0); v4f V = load4f (V0); v4f f01 = load4f (c [0]) + load4f (c [1]) * s; v4f f12 = load4f (c [2]) + load4f (c [3]) * s; v4f f = f01 + f12 * s * s; v4f ltS = _mm_cmplt_ps (s, S); v4f geT = _mm_cmpge_ps (s, T); v4f x1 = _mm_andnot_ps (_mm_or_ps (ltS, geT), f); v4f x2 = _mm_and_ps (ltS, U); v4f x3 = _mm_and_ps (geT, V); v4f val = _mm_or_ps (_mm_or_ps (x1, x2), x3); return _mm_hadd_ps (val, val); }
void step_t::initialize (float t0, float t1) { // Precompute the coefficents c of the cubic polynomial f // such that f(t0)=0, f(t1)=1, f'(t0)=0 and f'(t0)=1. float d = t1 - t0; float a = t1 + t0; c [0] = t0 * t0 * (a + d + d); c [1] = -6 * t0 * t1; c [2] = 3 * a; c [3] = -2; // Divide c [] by d^3. v4f dt = _mm_set1_ps (d); store4f (c, load4f (c) / (dt * dt * dt)); T [0] = t0; T [1] = t1; T [2] = t1; T [3] = std::numeric_limits <float>::infinity (); }
void BVH4Intersector1<PrimitiveIntersector>::intersect(const BVH4* bvh, Ray& ray) { /*! stack state */ StackItemInt32<NodeRef> stack[stackSize]; //!< stack of nodes StackItemInt32<NodeRef>* stackPtr = stack+1; //!< current stack pointer StackItemInt32<NodeRef>* stackEnd = stack+stackSize; stack[0].ptr = bvh->root; stack[0].dist = neg_inf; /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray.dir.x >= 0.0f ? 0*sizeof(ssef) : 1*sizeof(ssef); const size_t nearY = ray.dir.y >= 0.0f ? 2*sizeof(ssef) : 3*sizeof(ssef); const size_t nearZ = ray.dir.z >= 0.0f ? 4*sizeof(ssef) : 5*sizeof(ssef); #if 0 // FIXME: why is this slower /*! load the ray */ Vec3fa ray_org = ray.org; Vec3fa ray_dir = ray.dir; ssef ray_near = max(ray.tnear,FLT_MIN); // we do not support negative tnear values in this kernel due to integer optimizations ssef ray_far = ray.tfar; #if defined(__FIX_RAYS__) const float float_range = 0.1f*FLT_MAX; ray_org = clamp(ray_org,Vec3fa(-float_range),Vec3fa(+float_range)); ray_dir = clamp(ray_dir,Vec3fa(-float_range),Vec3fa(+float_range)); ray_far = min(ray_far,float(inf)); #endif const Vec3fa ray_rdir = rcp_safe(ray_dir); const sse3f org(ray_org), dir(ray_dir); const sse3f norg(-ray_org), rdir(ray_rdir), org_rdir(ray_org*ray_rdir); #else /*! load the ray into SIMD registers */ const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const Vec3fa ray_rdir = rcp_safe(ray.dir); const sse3f rdir(ray_rdir.x,ray_rdir.y,ray_rdir.z); const Vec3fa ray_org_rdir = ray.org*ray_rdir; const sse3f org_rdir(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z); const ssef ray_near(ray.tnear); ssef ray_far(ray.tfar); #endif /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = NodeRef(stackPtr->ptr); /*! if popped node is too far, pop next one */ if (unlikely(*(float*)&stackPtr->dist > ray.tfar)) continue; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(normal.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(); const size_t farX = nearX ^ 16, farY = nearY ^ 16, farZ = nearZ ^ 16; #if defined (__AVX2__) const ssef tNearX = msub(load4f((const char*)node+nearX), rdir.x, org_rdir.x); const ssef tNearY = msub(load4f((const char*)node+nearY), rdir.y, org_rdir.y); const ssef tNearZ = msub(load4f((const char*)node+nearZ), rdir.z, org_rdir.z); const ssef tFarX = msub(load4f((const char*)node+farX ), rdir.x, org_rdir.x); const ssef tFarY = msub(load4f((const char*)node+farY ), rdir.y, org_rdir.y); const ssef tFarZ = msub(load4f((const char*)node+farZ ), rdir.z, org_rdir.z); #else const ssef tNearX = (norg.x + load4f((const char*)node+nearX)) * rdir.x; const ssef tNearY = (norg.y + load4f((const char*)node+nearY)) * rdir.y; const ssef tNearZ = (norg.z + load4f((const char*)node+nearZ)) * rdir.z; const ssef tFarX = (norg.x + load4f((const char*)node+farX )) * rdir.x; const ssef tFarY = (norg.y + load4f((const char*)node+farY )) * rdir.y; const ssef tFarZ = (norg.z + load4f((const char*)node+farZ )) * rdir.z; #endif #if defined(__SSE4_1__) const ssef tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,ray_near)); const ssef tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,ray_far )); const sseb vmask = cast(tNear) > cast(tFar); size_t mask = movemask(vmask)^0xf; #else const ssef tNear = max(tNearX,tNearY,tNearZ,ray_near); const ssef tFar = min(tFarX ,tFarY ,tFarZ ,ray_far); const sseb vmask = tNear <= tFar; size_t mask = movemask(vmask); #endif /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); assert(cur != BVH4::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const unsigned int d0 = ((unsigned int*)&tNear)[r]; r = __bscf(mask); NodeRef c1 = node->child(r); const unsigned int d1 = ((unsigned int*)&tNear)[r]; assert(c0 != BVH4::emptyNode); assert(c1 != BVH4::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; continue; } else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; continue; } } /*! Here starts the slow path for 3 or 4 hit children. We push * all nodes onto the stack to sort them there. */ assert(stackPtr < stackEnd); stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; assert(stackPtr < stackEnd); stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); NodeRef c = node->child(r); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; assert(c != BVH4::emptyNode); if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); c = node->child(r); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; assert(c != BVH4::emptyNode); sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; } /*! this is a leaf node */ STAT3(normal.trav_leaves,1,1,1); size_t num; Primitive* prim = (Primitive*) cur.leaf(num); PrimitiveIntersector::intersect(ray,prim,num,bvh->geometry); ray_far = ray.tfar; } }
void BVH4Intersector1<PrimitiveIntersector>::occluded(const BVH4* bvh, Ray& ray) { /*! stack state */ NodeRef stack[stackSize]; //!< stack of nodes that still need to get traversed NodeRef* stackPtr = stack+1; //!< current stack pointer NodeRef* stackEnd = stack+stackSize; stack[0] = bvh->root; /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray.dir.x >= 0 ? 0*sizeof(ssef) : 1*sizeof(ssef); const size_t nearY = ray.dir.y >= 0 ? 2*sizeof(ssef) : 3*sizeof(ssef); const size_t nearZ = ray.dir.z >= 0 ? 4*sizeof(ssef) : 5*sizeof(ssef); #if 0 // FIXME: why is this slower /*! load the ray */ Vec3fa ray_org = ray.org; Vec3fa ray_dir = ray.dir; ssef ray_near = max(ray.tnear,FLT_MIN); // we do not support negative tnear values in this kernel due to integer optimizations ssef ray_far = ray.tfar; #if defined(__FIX_RAYS__) const float float_range = 0.1f*FLT_MAX; ray_org = clamp(ray_org,Vec3fa(-float_range),Vec3fa(+float_range)); ray_dir = clamp(ray_dir,Vec3fa(-float_range),Vec3fa(+float_range)); ray_far = min(ray_far,float(inf)); #endif const Vec3fa ray_rdir = rcp_safe(ray_dir); const sse3f org(ray_org), dir(ray_dir); const sse3f norg(-ray_org), rdir(ray_rdir), org_rdir(ray_org*ray_rdir); #else /*! load the ray into SIMD registers */ const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const Vec3fa ray_rdir = rcp_safe(ray.dir); const sse3f rdir(ray_rdir.x,ray_rdir.y,ray_rdir.z); const Vec3fa ray_org_rdir = ray.org*ray_rdir; const sse3f org_rdir(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z); const ssef ray_near(ray.tnear); ssef ray_far(ray.tfar); #endif /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = (NodeRef) *stackPtr; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(shadow.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(); const size_t farX = nearX ^ 16, farY = nearY ^ 16, farZ = nearZ ^ 16; #if defined (__AVX2__) const ssef tNearX = msub(load4f((const char*)node+nearX), rdir.x, org_rdir.x); const ssef tNearY = msub(load4f((const char*)node+nearY), rdir.y, org_rdir.y); const ssef tNearZ = msub(load4f((const char*)node+nearZ), rdir.z, org_rdir.z); const ssef tFarX = msub(load4f((const char*)node+farX ), rdir.x, org_rdir.x); const ssef tFarY = msub(load4f((const char*)node+farY ), rdir.y, org_rdir.y); const ssef tFarZ = msub(load4f((const char*)node+farZ ), rdir.z, org_rdir.z); #else const ssef tNearX = (norg.x + load4f((const char*)node+nearX)) * rdir.x; const ssef tNearY = (norg.y + load4f((const char*)node+nearY)) * rdir.y; const ssef tNearZ = (norg.z + load4f((const char*)node+nearZ)) * rdir.z; const ssef tFarX = (norg.x + load4f((const char*)node+farX )) * rdir.x; const ssef tFarY = (norg.y + load4f((const char*)node+farY )) * rdir.y; const ssef tFarZ = (norg.z + load4f((const char*)node+farZ )) * rdir.z; #endif #if defined(__SSE4_1__) const ssef tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,ray_near)); const ssef tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,ray_far )); const sseb vmask = cast(tNear) > cast(tFar); size_t mask = movemask(vmask)^0xf; #else const ssef tNear = max(tNearX,tNearY,tNearZ,ray_near); const ssef tFar = min(tFarX ,tFarY ,tFarZ ,ray_far); const sseb vmask = tNear <= tFar; size_t mask = movemask(vmask); #endif /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); assert(cur != BVH4::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const unsigned int d0 = ((unsigned int*)&tNear)[r]; r = __bscf(mask); NodeRef c1 = node->child(r); const unsigned int d1 = ((unsigned int*)&tNear)[r]; assert(c0 != BVH4::emptyNode); assert(c1 != BVH4::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { *stackPtr = c1; stackPtr++; cur = c0; continue; } else { *stackPtr = c0; stackPtr++; cur = c1; continue; } } assert(stackPtr < stackEnd); *stackPtr = c0; stackPtr++; assert(stackPtr < stackEnd); *stackPtr = c1; stackPtr++; /*! three children are hit */ r = __bscf(mask); cur = node->child(r); assert(cur != BVH4::emptyNode); if (likely(mask == 0)) continue; assert(stackPtr < stackEnd); *stackPtr = cur; stackPtr++; /*! four children are hit */ cur = node->child(3); assert(cur != BVH4::emptyNode); } /*! this is a leaf node */ STAT3(shadow.trav_leaves,1,1,1); size_t num; Primitive* prim = (Primitive*) cur.leaf(num); if (PrimitiveIntersector::occluded(ray,prim,num,bvh->geometry)) { ray.geomID = 0; break; } } AVX_ZERO_UPPER(); }
__forceinline bool BVH4Intersector4Hybrid<PrimitiveIntersector4>::occluded1(const BVH4* bvh, NodeRef root, size_t k, Ray4& ray, const sse3f& ray_org, const sse3f& ray_dir, const sse3f& ray_rdir, const ssef& ray_tnear, const ssef& ray_tfar) { /*! stack state */ NodeRef stack[stackSizeSingle]; //!< stack of nodes that still need to get traversed NodeRef* stackPtr = stack+1; //!< current stack pointer NodeRef* stackEnd = stack+stackSizeSingle; stack[0] = root; /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray_dir.x[k] >= 0.0f ? 0*sizeof(ssef) : 1*sizeof(ssef); const size_t nearY = ray_dir.y[k] >= 0.0f ? 2*sizeof(ssef) : 3*sizeof(ssef); const size_t nearZ = ray_dir.z[k] >= 0.0f ? 4*sizeof(ssef) : 5*sizeof(ssef); /*! load the ray into SIMD registers */ const sse3f org (ray_org .x[k],ray_org .y[k],ray_org .z[k]); const sse3f rdir(ray_rdir.x[k],ray_rdir.y[k],ray_rdir.z[k]); const sse3f norg = -org, org_rdir(org*rdir); const ssef rayNear(ray_tnear[k]), rayFar(ray_tfar[k]); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = (NodeRef) *stackPtr; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(shadow.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(); const size_t farX = nearX ^ 16, farY = nearY ^ 16, farZ = nearZ ^ 16; #if defined (__AVX2__) const ssef tNearX = msub(load4f((const char*)node+nearX), rdir.x, org_rdir.x); const ssef tNearY = msub(load4f((const char*)node+nearY), rdir.y, org_rdir.y); const ssef tNearZ = msub(load4f((const char*)node+nearZ), rdir.z, org_rdir.z); const ssef tFarX = msub(load4f((const char*)node+farX ), rdir.x, org_rdir.x); const ssef tFarY = msub(load4f((const char*)node+farY ), rdir.y, org_rdir.y); const ssef tFarZ = msub(load4f((const char*)node+farZ ), rdir.z, org_rdir.z); #else const ssef tNearX = (norg.x + load4f((const char*)node+nearX)) * rdir.x; const ssef tNearY = (norg.y + load4f((const char*)node+nearY)) * rdir.y; const ssef tNearZ = (norg.z + load4f((const char*)node+nearZ)) * rdir.z; const ssef tFarX = (norg.x + load4f((const char*)node+farX )) * rdir.x; const ssef tFarY = (norg.y + load4f((const char*)node+farY )) * rdir.y; const ssef tFarZ = (norg.z + load4f((const char*)node+farZ )) * rdir.z; #endif #if defined(__SSE4_1__) const ssef tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,rayNear)); const ssef tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,rayFar )); const sseb vmask = cast(tNear) > cast(tFar); size_t mask = movemask(vmask)^0xf; #else const ssef tNear = max(tNearX,tNearY,tNearZ,rayNear); const ssef tFar = min(tFarX ,tFarY ,tFarZ ,rayFar); const sseb vmask = tNear <= tFar; size_t mask = movemask(vmask); #endif /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); assert(cur != BVH4::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const float d0 = tNear[r]; r = __bscf(mask); NodeRef c1 = node->child(r); const float d1 = tNear[r]; assert(c0 != BVH4::emptyNode); assert(c1 != BVH4::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { *stackPtr = c1; stackPtr++; cur = c0; continue; } else { *stackPtr = c0; stackPtr++; cur = c1; continue; } } assert(stackPtr < stackEnd); *stackPtr = c0; stackPtr++; assert(stackPtr < stackEnd); *stackPtr = c1; stackPtr++; /*! three children are hit */ r = __bscf(mask); cur = node->child(r); assert(cur != BVH4::emptyNode); if (likely(mask == 0)) continue; assert(stackPtr < stackEnd); *stackPtr = cur; stackPtr++; /*! four children are hit */ cur = node->child(3); assert(cur != BVH4::emptyNode); } /*! this is a leaf node */ STAT3(shadow.trav_leaves,1,1,1); size_t num; Primitive* prim = (Primitive*) cur.leaf(num); if (PrimitiveIntersector4::occluded(ray,k,prim,num,bvh->geometry)) { ray.geomID[k] = 0; return true; } } return false; }
__forceinline void BVH4Intersector4Hybrid<PrimitiveIntersector4>::intersect1(const BVH4* bvh, NodeRef root, size_t k, Ray4& ray, const sse3f& ray_org, const sse3f& ray_dir, const sse3f& ray_rdir, const ssef& ray_tnear, const ssef& ray_tfar) { /*! stack state */ StackItem stack[stackSizeSingle]; //!< stack of nodes StackItem* stackPtr = stack+1; //!< current stack pointer StackItem* stackEnd = stack+stackSizeSingle; stack[0].ptr = root; stack[0].dist = neg_inf; /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray_dir.x[k] >= 0.0f ? 0*sizeof(ssef) : 1*sizeof(ssef); const size_t nearY = ray_dir.y[k] >= 0.0f ? 2*sizeof(ssef) : 3*sizeof(ssef); const size_t nearZ = ray_dir.z[k] >= 0.0f ? 4*sizeof(ssef) : 5*sizeof(ssef); /*! load the ray into SIMD registers */ const sse3f org (ray_org .x[k],ray_org .y[k],ray_org .z[k]); const sse3f rdir(ray_rdir.x[k],ray_rdir.y[k],ray_rdir.z[k]); const sse3f norg = -org, org_rdir(org*rdir); ssef rayNear(ray_tnear[k]), rayFar(ray_tfar[k]); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = NodeRef(stackPtr->ptr); /*! if popped node is too far, pop next one */ if (unlikely(stackPtr->dist > ray.tfar[k])) continue; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(normal.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(); const size_t farX = nearX ^ 16, farY = nearY ^ 16, farZ = nearZ ^ 16; #if defined (__AVX2__) const ssef tNearX = msub(load4f((const char*)node+nearX), rdir.x, org_rdir.x); const ssef tNearY = msub(load4f((const char*)node+nearY), rdir.y, org_rdir.y); const ssef tNearZ = msub(load4f((const char*)node+nearZ), rdir.z, org_rdir.z); const ssef tFarX = msub(load4f((const char*)node+farX ), rdir.x, org_rdir.x); const ssef tFarY = msub(load4f((const char*)node+farY ), rdir.y, org_rdir.y); const ssef tFarZ = msub(load4f((const char*)node+farZ ), rdir.z, org_rdir.z); #else const ssef tNearX = (norg.x + load4f((const char*)node+nearX)) * rdir.x; const ssef tNearY = (norg.y + load4f((const char*)node+nearY)) * rdir.y; const ssef tNearZ = (norg.z + load4f((const char*)node+nearZ)) * rdir.z; const ssef tFarX = (norg.x + load4f((const char*)node+farX )) * rdir.x; const ssef tFarY = (norg.y + load4f((const char*)node+farY )) * rdir.y; const ssef tFarZ = (norg.z + load4f((const char*)node+farZ )) * rdir.z; #endif #if defined(__SSE4_1__) const ssef tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,rayNear)); const ssef tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,rayFar )); const sseb vmask = cast(tNear) > cast(tFar); size_t mask = movemask(vmask)^0xf; #else const ssef tNear = max(tNearX,tNearY,tNearZ,rayNear); const ssef tFar = min(tFarX ,tFarY ,tFarZ ,rayFar); const sseb vmask = tNear <= tFar; size_t mask = movemask(vmask); #endif /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); assert(cur != BVH4::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const float d0 = tNear[r]; r = __bscf(mask); NodeRef c1 = node->child(r); const float d1 = tNear[r]; assert(c0 != BVH4::emptyNode); assert(c1 != BVH4::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; continue; } else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; continue; } } /*! Here starts the slow path for 3 or 4 hit children. We push * all nodes onto the stack to sort them there. */ assert(stackPtr < stackEnd); stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; assert(stackPtr < stackEnd); stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); NodeRef c = node->child(r); float d = tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; assert(c != BVH4::emptyNode); if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); c = node->child(r); d = tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; assert(c != BVH4::emptyNode); sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; } /*! this is a leaf node */ STAT3(normal.trav_leaves,1,1,1); size_t num; Primitive* prim = (Primitive*) cur.leaf(num); PrimitiveIntersector4::intersect(ray,k,prim,num,bvh->geometry); rayFar = ray.tfar[k]; } }
void make_system (unsigned q, unsigned r, const float (& xyz_in) [3] [4], float (* nodes) [4], std::uint8_t (* indices) [6]) { std::uint8_t * P, * Q, * R; // Permutations taking triangles around nodes. std::uint8_t * Qi; // Inverse of the permutation Q. std::uint8_t * Px, * Rx; // Map a triangle to its P- or R-node. std::uint8_t memory [360]; std::uint8_t * memp = memory; P = memp; memp += 60; Q = memp; memp += 60; R = memp; memp += 60; Qi = memp; memp += 60; Px = memp; memp += 60; Rx = memp; // memp += 60; const std::uint8_t undef = 0xff; const unsigned p = 2, N = 2 * p * q * r / (q * r + r * p + p * q - p * q * r); for (unsigned n = 0; n != sizeof memory; ++ n) memory [n] = undef; for (unsigned n = 0; n != N; ++ n) { n [Q] = n - n % q + (n + 1) % q; n [Q] [Qi] = n; } unsigned next_node = N / q; // We are given the coordinates of the P-, Q- and R-nodes in triangle 0. store4f (nodes [Px [0] = next_node ++], load4f (xyz_in [0])); store4f (nodes [0], load4f (xyz_in [1])); store4f (nodes [Rx [0] = next_node ++], load4f (xyz_in [2])); float two_pi = 0x1.921fb6P+002f; float A = two_pi / ui2f (p); float B = two_pi / ui2f (q); unsigned n0 = 0, m0 = 0; while ([& m0, Px, Rx, q, r, nodes, & next_node, N, B] () -> bool { // Calculate coordinates of any unknown P- and R-nodes around Q-node m0. ALIGNED16 rotor_t Y_rotate (nodes [m0 / q], B); for (unsigned n = m0 + 1; n != m0 + q; ++ n) { if (Px [n] == undef) { Px [n] = next_node; Y_rotate (nodes [Px [n - 1]], nodes [next_node]); ++ next_node; } if (Rx [n] == undef) { Rx [n] = next_node; Y_rotate (nodes [Rx [n - 1]], nodes [next_node]); ++ next_node; } } m0 += q; return m0 != N; } ()) { while (n0 [P] != undef) ++ n0; // Attach triangle m0 to triangle n0's dangling P-node. // At this point we learn the coordinates of the next Q-node. Px [m0] = Px [n0]; ALIGNED16 rotor_t X_rotate (nodes [Px [n0]], A); X_rotate (nodes [n0 / q], nodes [m0 / q]); // Work out the consequences of attaching the new triangle. // Invariant: n [P] = m if and only if m [Q] [R] = n, for all m, n < N. unsigned n = n0, m = m0; do { n [P] = m; m = m [Q]; m [R] = n; Rx [m] = Rx [n] = Rx [m] & Rx [n]; unsigned d = 1; while (d != r && n [R] != undef) { n = n [R]; ++ d; } while (d != r && m [P] != undef) { m = m [P] [Q]; ++ d; } if (d == r - 1) { n [R] = m; n = n [Qi]; m [P] = n; Px [m] = Px [n] = Px [m] & Px [n]; } if (n [P] != undef) { n = m0; m = n0; } } while (n [P] == undef); } for (unsigned n = 0; n != N; ++ n) { unsigned i = n; unsigned j = i [R]; unsigned k = j [P]; indices [n] [0] = Px [j]; indices [n] [1] = j / q; indices [n] [2] = Rx [i]; indices [n] [3] = Px [i]; indices [n] [4] = k / q; indices [n] [5] = Rx [k]; } }
void BVH4Intersector1Bezier<PrimitiveIntersector>::intersect(const BVH4* bvh, Ray& ray) { /*! perform per ray precalculations required by the primitive intersector */ Precalculations pre(ray); /*! stack state */ StackItemInt32<NodeRef> stack[stackSize]; //!< stack of nodes StackItemInt32<NodeRef>* stackPtr = stack+1; //!< current stack pointer StackItemInt32<NodeRef>* stackEnd = stack+stackSize; stack[0].ptr = bvh->root; stack[0].dist = neg_inf; /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray.dir.x >= 0.0f ? 0*sizeof(ssef) : 1*sizeof(ssef); const size_t nearY = ray.dir.y >= 0.0f ? 2*sizeof(ssef) : 3*sizeof(ssef); const size_t nearZ = ray.dir.z >= 0.0f ? 4*sizeof(ssef) : 5*sizeof(ssef); /*! load the ray into SIMD registers */ const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const Vec3fa ray_rdir = rcp_safe(ray.dir); const sse3f rdir(ray_rdir.x,ray_rdir.y,ray_rdir.z); const Vec3fa ray_org_rdir = ray.org*ray_rdir; const sse3f org_rdir(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z); const ssef ray_near(ray.tnear); ssef ray_far(ray.tfar); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = NodeRef(stackPtr->ptr); /*! if popped node is too far, pop next one */ if (unlikely(*(float*)&stackPtr->dist > ray.tfar)) continue; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(normal.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(); const size_t farX = nearX ^ 16, farY = nearY ^ 16, farZ = nearZ ^ 16; const ssef tFarX0 = abs((norg.x + load4f((const char*)node+farX )) * rdir.x); const ssef tFarY0 = abs((norg.y + load4f((const char*)node+farY )) * rdir.y); const ssef tFarZ0 = abs((norg.z + load4f((const char*)node+farZ )) * rdir.z); const ssef tFar0 = min(tFarX0 ,tFarY0 ,tFarZ0); const ssef radius = abs(ssef(ray.org.w) + tFar0 * ssef(ray.dir.w)); //const ssef radius = zero; //PRINT2(tFar0,radius); const ssef tLowerX = (norg.x + node->lower_x - radius) * rdir.x; const ssef tLowerY = (norg.y + node->lower_y - radius) * rdir.y; const ssef tLowerZ = (norg.z + node->lower_z - radius) * rdir.z; const ssef tUpperX = (norg.x + node->upper_x + radius) * rdir.x; const ssef tUpperY = (norg.y + node->upper_y + radius) * rdir.y; const ssef tUpperZ = (norg.z + node->upper_z + radius) * rdir.z; const ssef tNearX = min(tLowerX,tUpperX); const ssef tNearY = min(tLowerY,tUpperY); const ssef tNearZ = min(tLowerZ,tUpperZ); const ssef tFarX = max(tLowerX,tUpperX); const ssef tFarY = max(tLowerY,tUpperY); const ssef tFarZ = max(tLowerZ,tUpperZ); const ssef tNear = max(tNearX,tNearY,tNearZ,ray_near); const ssef tFar = min(tFarX ,tFarY ,tFarZ ,ray_far); const sseb vmask = tNear <= tFar; size_t mask = movemask(vmask); /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); //assert(cur != BVH4::emptyNode); // FIXME: enable these assertions again, currently traversing empty children continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const unsigned int d0 = ((unsigned int*)&tNear)[r]; r = __bscf(mask); NodeRef c1 = node->child(r); const unsigned int d1 = ((unsigned int*)&tNear)[r]; //assert(c0 != BVH4::emptyNode); //assert(c1 != BVH4::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; continue; } else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; continue; } } /*! Here starts the slow path for 3 or 4 hit children. We push * all nodes onto the stack to sort them there. */ assert(stackPtr < stackEnd); stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; assert(stackPtr < stackEnd); stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); NodeRef c = node->child(r); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; //assert(c != BVH4::emptyNode); if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); c = node->child(r); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; //assert(c != BVH4::emptyNode); sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; } /*! this is a leaf node */ STAT3(normal.trav_leaves,1,1,1); size_t num; Primitive* prim = (Primitive*) cur.leaf(num); PrimitiveIntersector::intersect(pre,ray,prim,num,bvh->geometry); ray_far = ray.tfar; } }