int main (int argc, char *argv[]) { MINT *a, *b, *c, *d; short h; mp_set_memory_functions (NULL, NULL, NULL); a = itom (123); b = xtom ("DEADBEEF"); c = itom (0); d = itom (0); move (a, b); madd (a, b, c); msub (a, b, c); mult (a, b, c); mdiv (b, a, c, d); sdiv (b, 2, c, &h); msqrt (a, c, d); pow (b, a, a, c); rpow (a, 3, c); gcd (a, b, c); mcmp (a, b); if (argc > 1) { min (c); mout (a); } mtox (b); mfree(a); exit (0); }
void qsub(void) { unsigned int *a, *ab, *b, *ba, *c; save(); p2 = pop(); p1 = pop(); ab = mmul(p1->u.q.a, p2->u.q.b); ba = mmul(p1->u.q.b, p2->u.q.a); a = msub(ab, ba); mfree(ab); mfree(ba); // zero? if (MZERO(a)) { mfree(a); push(zero); restore(); return; } b = mmul(p1->u.q.b, p2->u.q.b); c = mgcd(a, b); MSIGN(c) = MSIGN(b); p1 = alloc(); p1->k = NUM; p1->u.q.a = mdiv(a, c); p1->u.q.b = mdiv(b, c); mfree(a); mfree(b); mfree(c); push(p1); restore(); }
FN minvert(MINT *a, MINT *b, MINT *c) { MINT x, y, z, w, Anew, Aold; int i = 0; static MINT one; static int oneinit = 1; if (oneinit) { oneinit = 0; MSET(1,&one); } MINIT(&x); MINIT(&y); MINIT(&z); MINIT(&w); MINIT(&Aold); MSET (1,&Anew); mcopy(b, &x); mcopy(a, &y); /* * Loop invariant: * * y = -1^i * Anew * a mod b */ while(mtest(&y) != 0) { mdiv(&x, &y, &w, &z); mcopy(&Anew, &x); mmult(&w, &Anew, &Anew); madd(&Anew, &Aold, &Anew); mmove(&x, &Aold); mmove(&y, &x); mmove(&z, &y); i++; } if (mcmp(&one,&x)) { mcopy(&one,c); } else { mmove(&Aold, c); if( (i&01) == 0) msub(b, c, c); } MFREE(&x); MFREE(&y); MFREE(&z); MFREE(&w); MFREE(&Aold); MFREE(&Anew); }
void newton_fd(double (*funcpt)(double *,int),double *xi,int N,double *x) { int it,i; double *jac,*hess,*L,*xf; double stop,result; jac = (double*) malloc(sizeof(double) *N); xf = (double*) malloc(sizeof(double) *N); hess = (double*) malloc(sizeof(double) *N * N); L = (double*) malloc(sizeof(double) *N * N); it = 0; stop = 1.0; while (stop > 1e-05 && it < 100) { //result = funcpt(xi,N,jac,hess); jacobian_fd(funcpt,xi,N,jac); hessian_fd(funcpt,xi,N,hess); //mdisplay(jac,1,N); //mdisplay(hess,N,N); modelhess2(hess,N,L); //mdisplay(L,N,N); scale(jac,1,N,-1.0); linsolve_lower(L,N,jac,x); madd(x,xi,x,1,N); msub(x,xi,xf,1,N); stop = array_max_abs(xf,N); for(i=0;i < N;++i) { xi[i] = x[i]; } it++; //printf("result %lf \n",result); printf("Values obtained %lf , %lf after %d Iterations \n",x[0],x[1],it); } free(jac); free(hess); free(xf); free(L); }
msqrt(MINT *a, MINT *b, MINT *r) { MINT x,y,z; register alen,j; MINIT(&x); MINIT(&y); MINIT(&z); alen = a->len; if (alen<0) mpfatal("msqrt: neg arg"); if (alen==0) { mset(0,b); mset(0,r); return(0); } if(alen & 01) x.len = (1+alen)/2; else x.len = 1 + alen/2; valloc(x.val,x.len); for (j=x.len; (--j)>=0;) x.val[j]=0; if (alen & 01) x.val[x.len-1]=0400; else x.val[x.len-1]=1; for (;;) { mdiv(a,&x,&y,&z); madd(&x,&y,&y); mshiftr(&y,1); if (mcmp(&x,&y) <= 0) break; mmove(&y,&x); } mcopy(&x,&y); mmult(&x,&x,&x); msub(a,&x,r); MFREE(&x); MMOVEFREE(&y,b); MFREE(&z); return(r->len); }
void BVH4Intersector4Chunk<PrimitiveIntersector4>::intersect(sseb* valid_i, BVH4* bvh, Ray4& ray) { /* load ray */ const sseb valid0 = *valid_i; const sse3f rdir = rcp_safe(ray.dir); const sse3f org(ray.org), org_rdir = org * rdir; ssef ray_tnear = select(valid0,ray.tnear,ssef(pos_inf)); ssef ray_tfar = select(valid0,ray.tfar ,ssef(neg_inf)); const ssef inf = ssef(pos_inf); Precalculations pre(valid0,ray); /* allocate stack and push root node */ ssef stack_near[stackSize]; NodeRef stack_node[stackSize]; stack_node[0] = BVH4::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* stackEnd = stack_node+stackSize; NodeRef* __restrict__ sptr_node = stack_node + 2; ssef* __restrict__ sptr_near = stack_near + 2; while (1) { /* pop next node from stack */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; NodeRef curNode = *sptr_node; if (unlikely(curNode == BVH4::invalidNode)) { assert(sptr_node == stack_node); break; } /* cull node if behind closest hit point */ ssef curDist = *sptr_near; if (unlikely(none(ray_tfar > curDist))) continue; while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf())) break; const sseb valid_node = ray_tfar > curDist; STAT3(normal.trav_nodes,1,popcnt(valid_node),4); const Node* __restrict__ const node = curNode.node(); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; curNode = *sptr_node; curDist = *sptr_near; #pragma unroll(4) for (unsigned i=0; i<BVH4::N; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH4::emptyNode)) break; #if defined(__AVX2__) const ssef lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x); const ssef lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y); const ssef lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z); const ssef lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x); const ssef lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y); const ssef lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z); #else const ssef lclipMinX = (node->lower_x[i] - org.x) * rdir.x; const ssef lclipMinY = (node->lower_y[i] - org.y) * rdir.y; const ssef lclipMinZ = (node->lower_z[i] - org.z) * rdir.z; const ssef lclipMaxX = (node->upper_x[i] - org.x) * rdir.x; const ssef lclipMaxY = (node->upper_y[i] - org.y) * rdir.y; const ssef lclipMaxZ = (node->upper_z[i] - org.z) * rdir.z; #endif #if defined(__SSE4_1__) const ssef lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); const ssef lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); const sseb lhit = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar); #else const ssef lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const ssef lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const sseb lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); #endif /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); const ssef childDist = select(lhit,lnearP,inf); const NodeRef child = node->children[i]; assert(child != BVH4::emptyNode); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = curNode; *(sptr_near-1) = curDist; curDist = childDist; curNode = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } } /* return if stack is empty */ if (unlikely(curNode == BVH4::invalidNode)) { assert(sptr_node == stack_node); break; } /* intersect leaf */ const sseb valid_leaf = ray_tfar > curDist; STAT3(normal.trav_leaves,1,popcnt(valid_leaf),4); size_t items; const Primitive* prim = (Primitive*) curNode.leaf(items); PrimitiveIntersector4::intersect(valid_leaf,pre,ray,prim,items,bvh->geometry); ray_tfar = select(valid_leaf,ray.tfar,ray_tfar); } AVX_ZERO_UPPER(); }
static void test_heart() { printf("run test heart\n"); assert(madd(5, 6) == 11); assert(msub(1024, 512) == 512); }
void BVH8Intersector8Hybrid<PrimitiveIntersector8>::occluded(bool8* valid_i, BVH8* bvh, Ray8& ray) { /* load ray */ const bool8 valid = *valid_i; bool8 terminated = !valid; Vec3f8 ray_org = ray.org, ray_dir = ray.dir; float8 ray_tnear = ray.tnear, ray_tfar = ray.tfar; const Vec3f8 rdir = rcp_safe(ray_dir); const Vec3f8 org(ray_org), org_rdir = org * rdir; ray_tnear = select(valid,ray_tnear,float8(pos_inf)); ray_tfar = select(valid,ray_tfar ,float8(neg_inf)); const float8 inf = float8(pos_inf); Precalculations pre(valid,ray); /* compute near/far per ray */ Vec3i8 nearXYZ; nearXYZ.x = select(rdir.x >= 0.0f,int8(0*(int)sizeof(float8)),int8(1*(int)sizeof(float8))); nearXYZ.y = select(rdir.y >= 0.0f,int8(2*(int)sizeof(float8)),int8(3*(int)sizeof(float8))); nearXYZ.z = select(rdir.z >= 0.0f,int8(4*(int)sizeof(float8)),int8(5*(int)sizeof(float8))); /* allocate stack and push root node */ float8 stack_near[stackSizeChunk]; NodeRef stack_node[stackSizeChunk]; stack_node[0] = BVH8::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* stackEnd = stack_node+stackSizeChunk; NodeRef* __restrict__ sptr_node = stack_node + 2; float8* __restrict__ sptr_near = stack_near + 2; while (1) { /* pop next node from stack */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; NodeRef cur = *sptr_node; if (unlikely(cur == BVH8::invalidNode)) { assert(sptr_node == stack_node); break; } /* cull node if behind closest hit point */ float8 curDist = *sptr_near; const bool8 active = curDist < ray_tfar; if (unlikely(none(active))) continue; /* switch to single ray traversal */ #if !defined(__WIN32__) || defined(__X86_64__) size_t bits = movemask(active); if (unlikely(__popcnt(bits) <= SWITCH_THRESHOLD)) { for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) { if (occluded1(bvh,cur,i,pre,ray,ray_org,ray_dir,rdir,ray_tnear,ray_tfar,nearXYZ)) terminated[i] = -1; } if (all(terminated)) break; ray_tfar = select(terminated,float8(neg_inf),ray_tfar); continue; } #endif while (1) { /* test if this is a leaf node */ if (unlikely(cur.isLeaf())) break; const bool8 valid_node = ray_tfar > curDist; STAT3(shadow.trav_nodes,1,popcnt(valid_node),8); const Node* __restrict__ const node = (Node*)cur.node(); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; cur = *sptr_node; curDist = *sptr_near; for (unsigned i=0; i<BVH8::N; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH8::emptyNode)) break; #if defined(__AVX2__) const float8 lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x); const float8 lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y); const float8 lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z); const float8 lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x); const float8 lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y); const float8 lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z); const float8 lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); const float8 lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); const bool8 lhit = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar); #else const float8 lclipMinX = (node->lower_x[i] - org.x) * rdir.x; const float8 lclipMinY = (node->lower_y[i] - org.y) * rdir.y; const float8 lclipMinZ = (node->lower_z[i] - org.z) * rdir.z; const float8 lclipMaxX = (node->upper_x[i] - org.x) * rdir.x; const float8 lclipMaxY = (node->upper_y[i] - org.y) * rdir.y; const float8 lclipMaxZ = (node->upper_z[i] - org.z) * rdir.z; const float8 lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const float8 lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const bool8 lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); #endif /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); assert(child != BVH8::emptyNode); const float8 childDist = select(lhit,lnearP,inf); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = cur; *(sptr_near-1) = curDist; curDist = childDist; cur = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } } /* return if stack is empty */ if (unlikely(cur == BVH8::invalidNode)) { assert(sptr_node == stack_node); break; } /* intersect leaf */ assert(cur != BVH8::emptyNode); const bool8 valid_leaf = ray_tfar > curDist; STAT3(shadow.trav_leaves,1,popcnt(valid_leaf),8); size_t items; const Triangle* prim = (Triangle*) cur.leaf(items); terminated |= PrimitiveIntersector8::occluded(!terminated,pre,ray,prim,items,bvh->scene); if (all(terminated)) break; ray_tfar = select(terminated,float8(neg_inf),ray_tfar); } store8i(valid & terminated,&ray.geomID,0); AVX_ZERO_UPPER(); }
void BVH4iIntersector1<TriangleIntersector>::intersect(const BVH4iIntersector1* This, Ray& ray) { AVX_ZERO_UPPER(); STAT3(normal.travs,1,1,1); /*! stack state */ const BVH4i* bvh = This->bvh; StackItem stack[1+3*BVH4i::maxDepth]; //!< stack of nodes StackItem* stackPtr = stack+1; //!< current stack pointer stack[0].ptr = bvh->root; stack[0].dist = neg_inf; /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray.dir.x >= 0.0f ? 0*sizeof(ssef_m) : 1*sizeof(ssef_m); const size_t nearY = ray.dir.y >= 0.0f ? 2*sizeof(ssef_m) : 3*sizeof(ssef_m); const size_t nearZ = ray.dir.z >= 0.0f ? 4*sizeof(ssef_m) : 5*sizeof(ssef_m); /*! load the ray into SIMD registers */ const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const Vector3f ray_rdir = rcp_safe(ray.dir); const sse3f rdir(ray_rdir.x,ray_rdir.y,ray_rdir.z); const Vector3f ray_org_rdir = ray.org*ray_rdir; const sse3f org_rdir(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z); const ssef rayNear(ray.tnear); ssef rayFar(ray.tfar); const void* nodePtr = bvh->nodePtr(); const void* triPtr = bvh->triPtr(); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = NodeRef(stackPtr->ptr); /*! if popped node is too far, pop next one */ if (unlikely(stackPtr->dist > ray.tfar)) continue; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(normal.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(nodePtr); const size_t farX = nearX ^ 16, farY = nearY ^ 16, farZ = nearZ ^ 16; #if defined (__AVX2__) const ssef tNearX = msub(ssef((const char*)nodePtr+(size_t)cur+nearX), rdir.x, org_rdir.x); const ssef tNearY = msub(ssef((const char*)nodePtr+(size_t)cur+nearY), rdir.y, org_rdir.y); const ssef tNearZ = msub(ssef((const char*)nodePtr+(size_t)cur+nearZ), rdir.z, org_rdir.z); const ssef tFarX = msub(ssef((const char*)nodePtr+(size_t)cur+farX ), rdir.x, org_rdir.x); const ssef tFarY = msub(ssef((const char*)nodePtr+(size_t)cur+farY ), rdir.y, org_rdir.y); const ssef tFarZ = msub(ssef((const char*)nodePtr+(size_t)cur+farZ ), rdir.z, org_rdir.z); #else const ssef tNearX = (norg.x + ssef((const char*)nodePtr+(size_t)cur+nearX)) * rdir.x; const ssef tNearY = (norg.y + ssef((const char*)nodePtr+(size_t)cur+nearY)) * rdir.y; const ssef tNearZ = (norg.z + ssef((const char*)nodePtr+(size_t)cur+nearZ)) * rdir.z; const ssef tFarX = (norg.x + ssef((const char*)nodePtr+(size_t)cur+farX )) * rdir.x; const ssef tFarY = (norg.y + ssef((const char*)nodePtr+(size_t)cur+farY )) * rdir.y; const ssef tFarZ = (norg.z + ssef((const char*)nodePtr+(size_t)cur+farZ )) * rdir.z; #endif const ssef tNear = max(tNearX,tNearY,tNearZ,rayNear); const ssef tFar = min(tFarX ,tFarY ,tFarZ ,rayFar); size_t mask = movemask(tNear <= tFar); /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bsf(mask); mask = __btc(mask,r); if (likely(mask == 0)) { cur = node->child(r); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const float d0 = tNear[r]; r = __bsf(mask); mask = __btc(mask,r); NodeRef c1 = node->child(r); const float d1 = tNear[r]; if (likely(mask == 0)) { if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; continue; } else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; continue; } } /*! Here starts the slow path for 3 or 4 hit children. We push * all nodes onto the stack to sort them there. */ stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ r = __bsf(mask); mask = __btc(mask,r); NodeRef c = node->child(r); float d = tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ r = __bsf(mask); mask = __btc(mask,r); c = node->child(r); d = tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; } /*! this is a leaf node */ STAT3(normal.trav_leaves,1,1,1); size_t num; Triangle* tri = (Triangle*) cur.leaf(triPtr,num); for (size_t i=0; i<num; i++) TriangleIntersector::intersect(ray,tri[i],bvh->vertices); rayFar = ray.tfar; } AVX_ZERO_UPPER(); }
void BVH8Intersector8Chunk<PrimitiveIntersector8>::intersect(avxb* valid_i, BVH8* bvh, Ray8& ray) { #if defined(__AVX__) /* load ray */ const avxb valid0 = *valid_i; const avx3f rdir = rcp_safe(ray.dir); const avx3f org_rdir = ray.org * rdir; avxf ray_tnear = select(valid0,ray.tnear,pos_inf); avxf ray_tfar = select(valid0,ray.tfar ,neg_inf); const avxf inf = avxf(pos_inf); Precalculations pre(valid0,ray); /* allocate stack and push root node */ avxf stack_near[3*BVH8::maxDepth+1]; NodeRef stack_node[3*BVH8::maxDepth+1]; stack_node[0] = BVH8::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* __restrict__ sptr_node = stack_node + 2; avxf* __restrict__ sptr_near = stack_near + 2; while (1) { /* pop next node from stack */ sptr_node--; sptr_near--; NodeRef cur = *sptr_node; if (unlikely(cur == BVH8::invalidNode)) break; /* cull node if behind closest hit point */ avxf curDist = *sptr_near; if (unlikely(none(ray_tfar > curDist))) continue; while (1) { /* test if this is a leaf node */ if (unlikely(cur.isLeaf())) break; const avxb valid_node = ray_tfar > curDist; STAT3(normal.trav_nodes,1,popcnt(valid_node),8); const Node* __restrict__ const node = (BVH8::Node*)cur.node(); /* pop of next node */ sptr_node--; sptr_near--; cur = *sptr_node; // FIXME: this trick creates issues with stack depth curDist = *sptr_near; for (unsigned i=0; i<BVH8::N; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH8::emptyNode)) break; #if defined(__AVX2__) const avxf lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x); const avxf lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y); const avxf lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z); const avxf lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x); const avxf lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y); const avxf lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z); const avxf lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); const avxf lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); const avxb lhit = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar); #else const avxf lclipMinX = node->lower_x[i] * rdir.x - org_rdir.x; const avxf lclipMinY = node->lower_y[i] * rdir.y - org_rdir.y; const avxf lclipMinZ = node->lower_z[i] * rdir.z - org_rdir.z; const avxf lclipMaxX = node->upper_x[i] * rdir.x - org_rdir.x; const avxf lclipMaxY = node->upper_y[i] * rdir.y - org_rdir.y; const avxf lclipMaxZ = node->upper_z[i] * rdir.z - org_rdir.z; const avxf lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const avxf lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const avxb lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); #endif /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { const avxf childDist = select(lhit,lnearP,inf); const NodeRef child = node->children[i]; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *sptr_node = cur; *sptr_near = curDist; sptr_node++; sptr_near++; curDist = childDist; cur = child; } /* push hit child onto stack*/ else { *sptr_node = child; *sptr_near = childDist; sptr_node++; sptr_near++; } assert(sptr_node - stack_node < BVH8::maxDepth); } } } /* return if stack is empty */ if (unlikely(cur == BVH8::invalidNode)) break; /* intersect leaf */ assert(cur != BVH8::emptyNode); const avxb valid_leaf = ray_tfar > curDist; STAT3(normal.trav_leaves,1,popcnt(valid_leaf),8); size_t items; const Triangle* tri = (Triangle*) cur.leaf(items); PrimitiveIntersector8::intersect(valid_leaf,pre,ray,tri,items,bvh->geometry); ray_tfar = select(valid_leaf,ray.tfar,ray_tfar); } AVX_ZERO_UPPER(); #endif }
void BVH4iIntersector4Chunk<TriangleIntersector4>::occluded(sseb* valid_i, BVH4i* bvh, Ray4& ray) { /* load node and primitive array */ const Node * __restrict__ nodes = (Node *)bvh->nodePtr(); const Triangle * __restrict__ accel = (Triangle*)bvh->triPtr(); /* load ray */ const sseb valid = *valid_i; sseb terminated = !valid; const sse3f rdir = rcp_safe(ray.dir); const sse3f org_rdir = ray.org * rdir; ssef ray_tnear = select(valid,ray.tnear,pos_inf); ssef ray_tfar = select(valid,ray.tfar ,neg_inf); const ssef inf = ssef(pos_inf); /* allocate stack and push root node */ ssef stack_near[3*BVH4i::maxDepth+1]; NodeRef stack_node[3*BVH4i::maxDepth+1]; stack_node[0] = BVH4i::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* __restrict__ sptr_node = stack_node + 2; ssef* __restrict__ sptr_near = stack_near + 2; while (1) { /* pop next node from stack */ sptr_node--; sptr_near--; NodeRef curNode = *sptr_node; if (unlikely(curNode == BVH4i::invalidNode)) break; /* cull node if behind closest hit point */ ssef curDist = *sptr_near; if (unlikely(none(ray_tfar > curDist))) continue; while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf())) break; const sseb valid_node = ray_tfar > curDist; STAT3(shadow.trav_nodes,1,popcnt(valid_node),4); const Node* __restrict__ const node = curNode.node(nodes); /* pop of next node */ sptr_node--; sptr_near--; curNode = *sptr_node; // FIXME: this trick creates issues with stack depth curDist = *sptr_near; #pragma unroll(4) for (unsigned i=0; i<4; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH4i::emptyNode)) break; #if defined(__AVX2__) const ssef lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x); const ssef lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y); const ssef lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z); const ssef lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x); const ssef lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y); const ssef lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z); const ssef lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); const ssef lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); const sseb lhit = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar); #else const ssef lclipMinX = node->lower_x[i] * rdir.x - org_rdir.x; const ssef lclipMinY = node->lower_y[i] * rdir.y - org_rdir.y; const ssef lclipMinZ = node->lower_z[i] * rdir.z - org_rdir.z; const ssef lclipMaxX = node->upper_x[i] * rdir.x - org_rdir.x; const ssef lclipMaxY = node->upper_y[i] * rdir.y - org_rdir.y; const ssef lclipMaxZ = node->upper_z[i] * rdir.z - org_rdir.z; const ssef lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const ssef lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const sseb lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); #endif /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { const ssef childDist = select(lhit,lnearP,inf); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = curNode; *(sptr_near-1) = curDist; curDist = childDist; curNode = child; } /* push hit child onto stack*/ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } assert(sptr_node - stack_node < BVH4i::maxDepth); } } } /* return if stack is empty */ if (unlikely(curNode == BVH4i::invalidNode)) break; /* intersect leaf */ const sseb valid_leaf = ray_tfar > curDist; STAT3(shadow.trav_leaves,1,popcnt(valid_leaf),4); size_t items; const Triangle* tri = (Triangle*) curNode.leaf(accel, items); terminated |= TriangleIntersector4::occluded(!terminated,ray,tri,items,bvh->geometry); if (all(terminated)) break; ray_tfar = select(terminated,neg_inf,ray_tfar); } store4i(valid & terminated,&ray.geomID,0); AVX_ZERO_UPPER(); }
void BVH8iIntersector8Hybrid<TriangleIntersector8>::occluded(avxb* valid_i, BVH8i* bvh, Ray8& ray) { /* load ray */ const avxb valid = *valid_i; avxb terminated = !valid; avx3f ray_org = ray.org, ray_dir = ray.dir; avxf ray_tnear = ray.tnear, ray_tfar = ray.tfar; #if defined(__FIX_RAYS__) const avxf float_range = 0.1f*FLT_MAX; ray_org = clamp(ray_org,avx3f(-float_range),avx3f(+float_range)); ray_dir = clamp(ray_dir,avx3f(-float_range),avx3f(+float_range)); ray_tnear = max(ray_tnear,FLT_MIN); ray_tfar = min(ray_tfar,float(inf)); #endif const avx3f rdir = rcp_safe(ray_dir); const avx3f org(ray_org), org_rdir = org * rdir; ray_tnear = select(valid,ray_tnear,avxf(pos_inf)); ray_tfar = select(valid,ray_tfar ,avxf(neg_inf)); const avxf inf = avxf(pos_inf); /* compute near/far per ray */ avx3i nearXYZ; nearXYZ.x = select(rdir.x >= 0.0f,avxi(0*(int)sizeof(avxf)),avxi(1*(int)sizeof(avxf))); nearXYZ.y = select(rdir.y >= 0.0f,avxi(2*(int)sizeof(avxf)),avxi(3*(int)sizeof(avxf))); nearXYZ.z = select(rdir.z >= 0.0f,avxi(4*(int)sizeof(avxf)),avxi(5*(int)sizeof(avxf))); /* allocate stack and push root node */ avxf stack_near[stackSizeChunk]; NodeRef stack_node[stackSizeChunk]; stack_node[0] = BVH4i::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* stackEnd = stack_node+stackSizeChunk; NodeRef* __restrict__ sptr_node = stack_node + 2; avxf* __restrict__ sptr_near = stack_near + 2; const Node * __restrict__ nodes = (Node *)bvh->nodePtr(); const Triangle * __restrict__ accel = (Triangle*)bvh->triPtr(); while (1) { /* pop next node from stack */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; NodeRef curNode = *sptr_node; if (unlikely(curNode == BVH4i::invalidNode)) { assert(sptr_node == stack_node); break; } /* cull node if behind closest hit point */ avxf curDist = *sptr_near; const avxb active = curDist < ray_tfar; if (unlikely(none(active))) continue; /* switch to single ray traversal */ #if !defined(__WIN32__) || defined(__X86_64__) size_t bits = movemask(active); if (unlikely(__popcnt(bits) <= SWITCH_THRESHOLD)) { for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) { if (occluded1(bvh,curNode,i,ray,ray_org,ray_dir,rdir,ray_tnear,ray_tfar,nearXYZ)) terminated[i] = -1; } if (all(terminated)) break; ray_tfar = select(terminated,avxf(neg_inf),ray_tfar); continue; } #endif while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf())) break; const avxb valid_node = ray_tfar > curDist; STAT3(shadow.trav_nodes,1,popcnt(valid_node),8); const Node* __restrict__ const node = (Node*)curNode.node(nodes); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; curNode = *sptr_node; curDist = *sptr_near; for (unsigned i=0; i<8; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH4i::emptyNode)) break; #if defined(__AVX2__) const avxf lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x); const avxf lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y); const avxf lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z); const avxf lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x); const avxf lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y); const avxf lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z); const avxf lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); const avxf lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); const avxb lhit = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar); #else const avxf lclipMinX = (node->lower_x[i] - org.x) * rdir.x; const avxf lclipMinY = (node->lower_y[i] - org.y) * rdir.y; const avxf lclipMinZ = (node->lower_z[i] - org.z) * rdir.z; const avxf lclipMaxX = (node->upper_x[i] - org.x) * rdir.x; const avxf lclipMaxY = (node->upper_y[i] - org.y) * rdir.y; const avxf lclipMaxZ = (node->upper_z[i] - org.z) * rdir.z; const avxf lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const avxf lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const avxb lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); #endif /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); assert(child != BVH4i::emptyNode); const avxf childDist = select(lhit,lnearP,inf); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = curNode; *(sptr_near-1) = curDist; curDist = childDist; curNode = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } } /* return if stack is empty */ if (unlikely(curNode == BVH4i::invalidNode)) { assert(sptr_node == stack_node); break; } /* intersect leaf */ const avxb valid_leaf = ray_tfar > curDist; STAT3(shadow.trav_leaves,1,popcnt(valid_leaf),8); size_t items; const Triangle* prim = (Triangle*) curNode.leaf(accel,items); terminated |= TriangleIntersector8::occluded(!terminated,ray,prim,items,bvh->geometry); if (all(terminated)) break; ray_tfar = select(terminated,avxf(neg_inf),ray_tfar); } store8i(valid & terminated,&ray.geomID,0); AVX_ZERO_UPPER(); }
static int mprimef(unsigned int *n, unsigned int *q, int k) { int i, j; unsigned int *t, *x, *y; // generate x t = mcopy(n); while (1) { for (i = 0; i < MLENGTH(t); i++) t[i] = rand(); x = mmod(t, n); if (!MZERO(x) && !MEQUAL(x, 1)) break; mfree(x); } mfree(t); // exponentiate y = mmodpow(x, q, n); // done? if (MEQUAL(y, 1)) { mfree(x); mfree(y); return 1; } j = 0; while (1) { // y = n - 1? t = msub(n, y); if (MEQUAL(t, 1)) { mfree(t); mfree(x); mfree(y); return 1; } mfree(t); if (++j == k) { mfree(x); mfree(y); return 0; } // y = (y ^ 2) mod n t = mmul(y, y); mfree(y); y = mmod(t, n); mfree(t); // y = 1? if (MEQUAL(y, 1)) { mfree(x); mfree(y); return 0; } } }
__forceinline bool BVH4Intersector4Hybrid<PrimitiveIntersector4>::occluded1(const BVH4* bvh, NodeRef root, size_t k, Ray4& ray, const sse3f& ray_org, const sse3f& ray_dir, const sse3f& ray_rdir, const ssef& ray_tnear, const ssef& ray_tfar) { /*! stack state */ NodeRef stack[stackSizeSingle]; //!< stack of nodes that still need to get traversed NodeRef* stackPtr = stack+1; //!< current stack pointer NodeRef* stackEnd = stack+stackSizeSingle; stack[0] = root; /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray_dir.x[k] >= 0.0f ? 0*sizeof(ssef) : 1*sizeof(ssef); const size_t nearY = ray_dir.y[k] >= 0.0f ? 2*sizeof(ssef) : 3*sizeof(ssef); const size_t nearZ = ray_dir.z[k] >= 0.0f ? 4*sizeof(ssef) : 5*sizeof(ssef); /*! load the ray into SIMD registers */ const sse3f org (ray_org .x[k],ray_org .y[k],ray_org .z[k]); const sse3f rdir(ray_rdir.x[k],ray_rdir.y[k],ray_rdir.z[k]); const sse3f norg = -org, org_rdir(org*rdir); const ssef rayNear(ray_tnear[k]), rayFar(ray_tfar[k]); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = (NodeRef) *stackPtr; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(shadow.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(); const size_t farX = nearX ^ 16, farY = nearY ^ 16, farZ = nearZ ^ 16; #if defined (__AVX2__) const ssef tNearX = msub(load4f((const char*)node+nearX), rdir.x, org_rdir.x); const ssef tNearY = msub(load4f((const char*)node+nearY), rdir.y, org_rdir.y); const ssef tNearZ = msub(load4f((const char*)node+nearZ), rdir.z, org_rdir.z); const ssef tFarX = msub(load4f((const char*)node+farX ), rdir.x, org_rdir.x); const ssef tFarY = msub(load4f((const char*)node+farY ), rdir.y, org_rdir.y); const ssef tFarZ = msub(load4f((const char*)node+farZ ), rdir.z, org_rdir.z); #else const ssef tNearX = (norg.x + load4f((const char*)node+nearX)) * rdir.x; const ssef tNearY = (norg.y + load4f((const char*)node+nearY)) * rdir.y; const ssef tNearZ = (norg.z + load4f((const char*)node+nearZ)) * rdir.z; const ssef tFarX = (norg.x + load4f((const char*)node+farX )) * rdir.x; const ssef tFarY = (norg.y + load4f((const char*)node+farY )) * rdir.y; const ssef tFarZ = (norg.z + load4f((const char*)node+farZ )) * rdir.z; #endif #if defined(__SSE4_1__) const ssef tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,rayNear)); const ssef tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,rayFar )); const sseb vmask = cast(tNear) > cast(tFar); size_t mask = movemask(vmask)^0xf; #else const ssef tNear = max(tNearX,tNearY,tNearZ,rayNear); const ssef tFar = min(tFarX ,tFarY ,tFarZ ,rayFar); const sseb vmask = tNear <= tFar; size_t mask = movemask(vmask); #endif /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); assert(cur != BVH4::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const float d0 = tNear[r]; r = __bscf(mask); NodeRef c1 = node->child(r); const float d1 = tNear[r]; assert(c0 != BVH4::emptyNode); assert(c1 != BVH4::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { *stackPtr = c1; stackPtr++; cur = c0; continue; } else { *stackPtr = c0; stackPtr++; cur = c1; continue; } } assert(stackPtr < stackEnd); *stackPtr = c0; stackPtr++; assert(stackPtr < stackEnd); *stackPtr = c1; stackPtr++; /*! three children are hit */ r = __bscf(mask); cur = node->child(r); assert(cur != BVH4::emptyNode); if (likely(mask == 0)) continue; assert(stackPtr < stackEnd); *stackPtr = cur; stackPtr++; /*! four children are hit */ cur = node->child(3); assert(cur != BVH4::emptyNode); } /*! this is a leaf node */ STAT3(shadow.trav_leaves,1,1,1); size_t num; Primitive* prim = (Primitive*) cur.leaf(num); if (PrimitiveIntersector4::occluded(ray,k,prim,num,bvh->geometry)) { ray.geomID[k] = 0; return true; } } return false; }
__forceinline void BVH4Intersector4Hybrid<PrimitiveIntersector4>::intersect1(const BVH4* bvh, NodeRef root, size_t k, Ray4& ray, const sse3f& ray_org, const sse3f& ray_dir, const sse3f& ray_rdir, const ssef& ray_tnear, const ssef& ray_tfar) { /*! stack state */ StackItem stack[stackSizeSingle]; //!< stack of nodes StackItem* stackPtr = stack+1; //!< current stack pointer StackItem* stackEnd = stack+stackSizeSingle; stack[0].ptr = root; stack[0].dist = neg_inf; /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray_dir.x[k] >= 0.0f ? 0*sizeof(ssef) : 1*sizeof(ssef); const size_t nearY = ray_dir.y[k] >= 0.0f ? 2*sizeof(ssef) : 3*sizeof(ssef); const size_t nearZ = ray_dir.z[k] >= 0.0f ? 4*sizeof(ssef) : 5*sizeof(ssef); /*! load the ray into SIMD registers */ const sse3f org (ray_org .x[k],ray_org .y[k],ray_org .z[k]); const sse3f rdir(ray_rdir.x[k],ray_rdir.y[k],ray_rdir.z[k]); const sse3f norg = -org, org_rdir(org*rdir); ssef rayNear(ray_tnear[k]), rayFar(ray_tfar[k]); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = NodeRef(stackPtr->ptr); /*! if popped node is too far, pop next one */ if (unlikely(stackPtr->dist > ray.tfar[k])) continue; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(normal.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(); const size_t farX = nearX ^ 16, farY = nearY ^ 16, farZ = nearZ ^ 16; #if defined (__AVX2__) const ssef tNearX = msub(load4f((const char*)node+nearX), rdir.x, org_rdir.x); const ssef tNearY = msub(load4f((const char*)node+nearY), rdir.y, org_rdir.y); const ssef tNearZ = msub(load4f((const char*)node+nearZ), rdir.z, org_rdir.z); const ssef tFarX = msub(load4f((const char*)node+farX ), rdir.x, org_rdir.x); const ssef tFarY = msub(load4f((const char*)node+farY ), rdir.y, org_rdir.y); const ssef tFarZ = msub(load4f((const char*)node+farZ ), rdir.z, org_rdir.z); #else const ssef tNearX = (norg.x + load4f((const char*)node+nearX)) * rdir.x; const ssef tNearY = (norg.y + load4f((const char*)node+nearY)) * rdir.y; const ssef tNearZ = (norg.z + load4f((const char*)node+nearZ)) * rdir.z; const ssef tFarX = (norg.x + load4f((const char*)node+farX )) * rdir.x; const ssef tFarY = (norg.y + load4f((const char*)node+farY )) * rdir.y; const ssef tFarZ = (norg.z + load4f((const char*)node+farZ )) * rdir.z; #endif #if defined(__SSE4_1__) const ssef tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,rayNear)); const ssef tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,rayFar )); const sseb vmask = cast(tNear) > cast(tFar); size_t mask = movemask(vmask)^0xf; #else const ssef tNear = max(tNearX,tNearY,tNearZ,rayNear); const ssef tFar = min(tFarX ,tFarY ,tFarZ ,rayFar); const sseb vmask = tNear <= tFar; size_t mask = movemask(vmask); #endif /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); assert(cur != BVH4::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const float d0 = tNear[r]; r = __bscf(mask); NodeRef c1 = node->child(r); const float d1 = tNear[r]; assert(c0 != BVH4::emptyNode); assert(c1 != BVH4::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; continue; } else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; continue; } } /*! Here starts the slow path for 3 or 4 hit children. We push * all nodes onto the stack to sort them there. */ assert(stackPtr < stackEnd); stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; assert(stackPtr < stackEnd); stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); NodeRef c = node->child(r); float d = tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; assert(c != BVH4::emptyNode); if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); c = node->child(r); d = tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; assert(c != BVH4::emptyNode); sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; } /*! this is a leaf node */ STAT3(normal.trav_leaves,1,1,1); size_t num; Primitive* prim = (Primitive*) cur.leaf(num); PrimitiveIntersector4::intersect(ray,k,prim,num,bvh->geometry); rayFar = ray.tfar[k]; } }
void BVH4Intersector4Hybrid<PrimitiveIntersector4>::intersect(sseb* valid_i, BVH4* bvh, Ray4& ray) { /* load ray */ const sseb valid0 = *valid_i; sse3f ray_org = ray.org, ray_dir = ray.dir; ssef ray_tnear = ray.tnear, ray_tfar = ray.tfar; #if defined(__FIX_RAYS__) const ssef float_range = 0.1f*FLT_MAX; ray_org = clamp(ray_org,sse3f(-float_range),sse3f(+float_range)); ray_dir = clamp(ray_dir,sse3f(-float_range),sse3f(+float_range)); ray_tnear = max(ray_tnear,FLT_MIN); ray_tfar = min(ray_tfar,float(inf)); #endif const sse3f rdir = rcp_safe(ray_dir); const sse3f org(ray_org), org_rdir = org * rdir; ray_tnear = select(valid0,ray_tnear,ssef(pos_inf)); ray_tfar = select(valid0,ray_tfar ,ssef(neg_inf)); const ssef inf = ssef(pos_inf); /* allocate stack and push root node */ ssef stack_near[stackSizeChunk]; NodeRef stack_node[stackSizeChunk]; stack_node[0] = BVH4::invalidNode; stack_near[0] = inf; stack_node[1] = bvh->root; stack_near[1] = ray_tnear; NodeRef* stackEnd = stack_node+stackSizeChunk; NodeRef* __restrict__ sptr_node = stack_node + 2; ssef* __restrict__ sptr_near = stack_near + 2; while (1) { /* pop next node from stack */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; NodeRef curNode = *sptr_node; if (unlikely(curNode == BVH4::invalidNode)) { assert(sptr_node == stack_node); break; } /* cull node if behind closest hit point */ ssef curDist = *sptr_near; const sseb active = curDist < ray_tfar; if (unlikely(none(active))) continue; /* switch to single ray traversal */ #if !defined(__WIN32__) || defined(__X86_64__) size_t bits = movemask(active); if (unlikely(__popcnt(bits) <= SWITCH_THRESHOLD)) { for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) { intersect1(bvh,curNode,i,ray,ray_org,ray_dir,rdir,ray_tnear,ray_tfar); } ray_tfar = ray.tfar; continue; } #endif while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf())) break; const sseb valid_node = ray_tfar > curDist; STAT3(normal.trav_nodes,1,popcnt(valid_node),4); const Node* __restrict__ const node = curNode.node(); /* pop of next node */ assert(sptr_node > stack_node); sptr_node--; sptr_near--; curNode = *sptr_node; curDist = *sptr_near; #pragma unroll(4) for (unsigned i=0; i<4; i++) { const NodeRef child = node->children[i]; if (unlikely(child == BVH4::emptyNode)) break; #if defined(__AVX2__) const ssef lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x); const ssef lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y); const ssef lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z); const ssef lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x); const ssef lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y); const ssef lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z); #else const ssef lclipMinX = (node->lower_x[i] - org.x) * rdir.x; const ssef lclipMinY = (node->lower_y[i] - org.y) * rdir.y; const ssef lclipMinZ = (node->lower_z[i] - org.z) * rdir.z; const ssef lclipMaxX = (node->upper_x[i] - org.x) * rdir.x; const ssef lclipMaxY = (node->upper_y[i] - org.y) * rdir.y; const ssef lclipMaxZ = (node->upper_z[i] - org.z) * rdir.z; #endif #if defined(__SSE4_1__) const ssef lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ)); const ssef lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ)); const sseb lhit = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar); #else const ssef lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const ssef lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const sseb lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); #endif /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { assert(sptr_node < stackEnd); const ssef childDist = select(lhit,lnearP,inf); const NodeRef child = node->children[i]; assert(child != BVH4::emptyNode); sptr_node++; sptr_near++; /* push cur node onto stack and continue with hit child */ if (any(childDist < curDist)) { *(sptr_node-1) = curNode; *(sptr_near-1) = curDist; curDist = childDist; curNode = child; } /* push hit child onto stack */ else { *(sptr_node-1) = child; *(sptr_near-1) = childDist; } } } } /* return if stack is empty */ if (unlikely(curNode == BVH4::invalidNode)) { assert(sptr_node == stack_node); break; } /* intersect leaf */ const sseb valid_leaf = ray_tfar > curDist; STAT3(normal.trav_leaves,1,popcnt(valid_leaf),4); size_t items; const Primitive* prim = (Primitive*) curNode.leaf(items); PrimitiveIntersector4::intersect(valid_leaf,ray,prim,items,bvh->geometry); ray_tfar = select(valid_leaf,ray.tfar,ray_tfar); } AVX_ZERO_UPPER(); }
__forceinline void intersectT(const BVH4* bvh, Ray& ray) { typedef typename TriangleIntersector::Triangle Triangle; typedef StackItemT<size_t> StackItem; typedef typename BVH4::NodeRef NodeRef; typedef typename BVH4::Node Node; /*! stack state */ StackItem stack[1+3*BVH4::maxDepth]; //!< stack of nodes StackItem* stackPtr = stack+1; //!< current stack pointer stack[0].ptr = bvh->root; stack[0].dist = neg_inf; /*! load the ray into SIMD registers */ const avxf pos_neg = avxf(ssef(+0.0f),ssef(-0.0f)); const avxf neg_pos = avxf(ssef(-0.0f),ssef(+0.0f)); const avxf flipSignX = swapX ? neg_pos : pos_neg; const avxf flipSignY = swapY ? neg_pos : pos_neg; const avxf flipSignZ = swapZ ? neg_pos : pos_neg; const Vector3f ray_rdir = rcp_safe(ray.dir); const avx3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const avx3f rdir(ray_rdir.x^flipSignX,ray_rdir.y^flipSignY,ray_rdir.z^flipSignZ); const avx3f org_rdir(avx3f(ray.org.x,ray.org.y,ray.org.z)*rdir); avxf rayNearFar(ssef(ray.tnear),-ssef(ray.tfar)); const void* nodePtr = bvh->nodePtr(); const void* triPtr = bvh->triPtr(); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = NodeRef(stackPtr->ptr); /*! if popped node is too far, pop next one */ if (unlikely(stackPtr->dist > ray.tfar)) continue; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(normal.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(nodePtr); #if defined (__AVX2__) || defined(__MIC__) const avxf tLowerUpperX = msub(avxf::load(&node->lower_x), rdir.x, org_rdir.x); const avxf tLowerUpperY = msub(avxf::load(&node->lower_y), rdir.y, org_rdir.y); const avxf tLowerUpperZ = msub(avxf::load(&node->lower_z), rdir.z, org_rdir.z); #else const avxf tLowerUpperX = (norg.x + avxf::load(&node->lower_x)) * rdir.x; const avxf tLowerUpperY = (norg.y + avxf::load(&node->lower_y)) * rdir.y; const avxf tLowerUpperZ = (norg.z + avxf::load(&node->lower_z)) * rdir.z; #endif const avxf tNearFarX = swapX ? shuffle<1,0>(tLowerUpperX) : tLowerUpperX; const avxf tNearFarY = swapY ? shuffle<1,0>(tLowerUpperY) : tLowerUpperY; const avxf tNearFarZ = swapZ ? shuffle<1,0>(tLowerUpperZ) : tLowerUpperZ; const avxf tNearFar = max(tNearFarX,tNearFarY,tNearFarZ,rayNearFar); const ssef tNear = extract<0>(tNearFar); const ssef tFar = extract<1>(tNearFar); size_t mask = movemask(-tNear >= tFar); /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bsf(mask); mask = __btc(mask,r); if (likely(mask == 0)) { cur = node->child(r); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const float d0 = tNear[r]; r = __bsf(mask); mask = __btc(mask,r); NodeRef c1 = node->child(r); const float d1 = tNear[r]; if (likely(mask == 0)) { if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; continue; } else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; continue; } } /*! Here starts the slow path for 3 or 4 hit children. We push * all nodes onto the stack to sort them there. */ stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ r = __bsf(mask); mask = __btc(mask,r); NodeRef c = node->child(r); float d = tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ r = __bsf(mask); mask = __btc(mask,r); c = node->child(r); d = tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; } /*! this is a leaf node */ STAT3(normal.trav_leaves,1,1,1); size_t num; Triangle* tri = (Triangle*) cur.leaf(triPtr,num); for (size_t i=0; i<num; i++) TriangleIntersector::intersect(ray,tri[i],bvh->vertices); rayNearFar = insert<1>(rayNearFar,-ssef(ray.tfar)); } }
__forceinline bool occludedT(const BVH4* bvh, Ray& ray) { typedef typename TriangleIntersector::Triangle Triangle; typedef StackItemT<size_t> StackItem; typedef typename BVH4::NodeRef NodeRef; typedef typename BVH4::Node Node; /*! stack state */ NodeRef stack[1+3*BVH4::maxDepth]; //!< stack of nodes that still need to get traversed NodeRef* stackPtr = stack+1; //!< current stack pointer stack[0] = bvh->root; /*! load the ray into SIMD registers */ const avxf pos_neg = avxf(ssef(+0.0f),ssef(-0.0f)); const avxf neg_pos = avxf(ssef(-0.0f),ssef(+0.0f)); const avxf flipSignX = swapX ? neg_pos : pos_neg; const avxf flipSignY = swapY ? neg_pos : pos_neg; const avxf flipSignZ = swapZ ? neg_pos : pos_neg; const avx3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const Vector3f ray_rdir = rcp_safe(ray.dir); const avx3f rdir(ray_rdir.x^flipSignX,ray_rdir.y^flipSignY,ray_rdir.z^flipSignZ); const avx3f org_rdir(avx3f(ray.org.x,ray.org.y,ray.org.z)*rdir); const avxf rayNearFar(ssef(ray.tnear),-ssef(ray.tfar)); const void* nodePtr = bvh->nodePtr(); const void* triPtr = bvh->triPtr(); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = (NodeRef) *stackPtr; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(shadow.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(nodePtr); #if defined (__AVX2__) || defined(__MIC__) const avxf tLowerUpperX = msub(avxf::load(&node->lower_x), rdir.x, org_rdir.x); const avxf tLowerUpperY = msub(avxf::load(&node->lower_y), rdir.y, org_rdir.y); const avxf tLowerUpperZ = msub(avxf::load(&node->lower_z), rdir.z, org_rdir.z); #else const avxf tLowerUpperX = (norg.x + avxf::load(&node->lower_x)) * rdir.x; const avxf tLowerUpperY = (norg.y + avxf::load(&node->lower_y)) * rdir.y; const avxf tLowerUpperZ = (norg.z + avxf::load(&node->lower_z)) * rdir.z; #endif const avxf tNearFarX = swapX ? shuffle<1,0>(tLowerUpperX) : tLowerUpperX; const avxf tNearFarY = swapY ? shuffle<1,0>(tLowerUpperY) : tLowerUpperY; const avxf tNearFarZ = swapZ ? shuffle<1,0>(tLowerUpperZ) : tLowerUpperZ; const avxf tNearFar = max(tNearFarX,tNearFarY,tNearFarZ,rayNearFar); const ssef tNear = extract<0>(tNearFar); const ssef tFar = extract<1>(tNearFar); size_t mask = movemask(-tNear >= tFar); /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bsf(mask); mask = __btc(mask,r); if (likely(mask == 0)) { cur = node->child(r); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const float d0 = tNear[r]; r = __bsf(mask); mask = __btc(mask,r); NodeRef c1 = node->child(r); const float d1 = tNear[r]; if (likely(mask == 0)) { if (d0 < d1) { *stackPtr = c1; stackPtr++; cur = c0; continue; } else { *stackPtr = c0; stackPtr++; cur = c1; continue; } } *stackPtr = c0; stackPtr++; *stackPtr = c1; stackPtr++; /*! three children are hit */ r = __bsf(mask); mask = __btc(mask,r); cur = node->child(r); *stackPtr = cur; stackPtr++; if (likely(mask == 0)) { stackPtr--; continue; } /*! four children are hit */ cur = node->child(3); } /*! this is a leaf node */ STAT3(shadow.trav_leaves,1,1,1); size_t num; Triangle* tri = (Triangle*) cur.leaf(triPtr,num); for (size_t i=0; i<num; i++) { if (TriangleIntersector::occluded(ray,tri[i],bvh->vertices)) { AVX_ZERO_UPPER(); return true; } } } AVX_ZERO_UPPER(); return false; }
unsigned int * mfactor(unsigned int *n) { unsigned int *r, *root, *t, *two, *x, *y; two = mint(2); root = msqrt(n); // y = 1; y = mint(1); // x = 2 isqrt(n) + 1 t = madd(root, root); x = madd(t, y); mfree(t); // r = isqrt(n) ^ 2 - n t = mmul(root, root); r = msub(t, n); mfree(t); mfree(root); while (1) { if (MZERO(r)) { // n = (x - y) / 2 t = msub(x, y); n = mdiv(t, two); mfree(t); mfree(r); mfree(x); mfree(y); mfree(two); return n; } // r = r + x t = madd(r, x); mfree(r); r = t; // x = x + 2 t = madd(x, two); mfree(x); x = t; while (1) { // r = r - y t = msub(r, y); mfree(r); r = t; // y = y + 2 t = madd(y, two); mfree(y); y = t; if (MSIGN(r) == -1 || MZERO(r)) break; } } }
__forceinline void BVH8iIntersector8Hybrid<TriangleIntersector8>::intersect1(const BVH8i* bvh, NodeRef root, const size_t k, Ray8& ray,const avx3f &ray_org, const avx3f &ray_dir, const avx3f &ray_rdir, const avxf &ray_tnear, const avxf &ray_tfar, const avx3i& nearXYZ) { /*! stack state */ StackItemInt64 stack[stackSizeSingle]; //!< stack of nodes StackItemInt64* stackPtr = stack+1; //!< current stack pointer StackItemInt64* stackEnd = stack+stackSizeSingle; stack[0].ptr = root; stack[0].dist = neg_inf; /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = nearXYZ.x[k]; const size_t nearY = nearXYZ.y[k]; const size_t nearZ = nearXYZ.z[k]; /*! load the ray into SIMD registers */ const avx3f org (ray_org .x[k],ray_org .y[k],ray_org .z[k]); const avx3f rdir(ray_rdir.x[k],ray_rdir.y[k],ray_rdir.z[k]); const avx3f org_rdir(org*rdir); avxf rayNear(ray_tnear[k]), rayFar(ray_tfar[k]); const Node * __restrict__ nodes = (Node *)bvh->nodePtr(); const Triangle * __restrict__ accel = (Triangle*)bvh->triPtr(); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = NodeRef(stackPtr->ptr); /*! if popped node is too far, pop next one */ if (unlikely(*(float*)&stackPtr->dist > ray.tfar[k])) continue; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(normal.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = (Node*)cur.node(nodes); const size_t farX = nearX ^ sizeof(avxf), farY = nearY ^ sizeof(avxf), farZ = nearZ ^ sizeof(avxf); #if defined (__AVX2__) const avxf tNearX = msub(load8f((const char*)node+nearX), rdir.x, org_rdir.x); const avxf tNearY = msub(load8f((const char*)node+nearY), rdir.y, org_rdir.y); const avxf tNearZ = msub(load8f((const char*)node+nearZ), rdir.z, org_rdir.z); const avxf tFarX = msub(load8f((const char*)node+farX ), rdir.x, org_rdir.x); const avxf tFarY = msub(load8f((const char*)node+farY ), rdir.y, org_rdir.y); const avxf tFarZ = msub(load8f((const char*)node+farZ ), rdir.z, org_rdir.z); #else const avxf tNearX = (load8f((const char*)node+nearX) - org.x) * rdir.x; const avxf tNearY = (load8f((const char*)node+nearY) - org.y) * rdir.y; const avxf tNearZ = (load8f((const char*)node+nearZ) - org.z) * rdir.z; const avxf tFarX = (load8f((const char*)node+farX ) - org.x) * rdir.x; const avxf tFarY = (load8f((const char*)node+farY ) - org.y) * rdir.y; const avxf tFarZ = (load8f((const char*)node+farZ ) - org.z) * rdir.z; #endif #if defined(__AVX2__) const avxf tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,rayNear)); const avxf tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,rayFar )); const avxb vmask = cast(tNear) > cast(tFar); unsigned int mask = movemask(vmask)^0xff; #else const avxf tNear = max(tNearX,tNearY,tNearZ,rayNear); const avxf tFar = min(tFarX ,tFarY ,tFarZ ,rayFar); const avxb vmask = tNear <= tFar; unsigned int mask = movemask(vmask); #endif /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); assert(cur != BVH4i::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const unsigned int d0 = ((unsigned int*)&tNear)[r]; r = __bscf(mask); NodeRef c1 = node->child(r); const unsigned int d1 = ((unsigned int*)&tNear)[r]; assert(c0 != BVH4i::emptyNode); assert(c1 != BVH4i::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; continue; } else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; continue; } } /*! Here starts the slow path for 3 or 4 hit children. We push * all nodes onto the stack to sort them there. */ assert(stackPtr < stackEnd); stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; assert(stackPtr < stackEnd); stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); NodeRef c = node->child(r); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; assert(c0 != BVH4i::emptyNode); if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); c = node->child(r); d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; assert(c != BVH4i::emptyNode); if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } while(1) { r = __bscf(mask); c = node->child(r); d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; if (unlikely(mask == 0)) break; } cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; } /*! this is a leaf node */ STAT3(normal.trav_leaves,1,1,1); size_t num; Triangle* prim = (Triangle*) cur.leaf(accel,num); TriangleIntersector8::intersect(ray,k,prim,num,bvh->geometry); rayFar = ray.tfar[k]; } }
__forceinline bool BVH8iIntersector8Hybrid<TriangleIntersector8>::occluded1(const BVH8i* bvh, NodeRef root, const size_t k, Ray8& ray,const avx3f &ray_org, const avx3f &ray_dir, const avx3f &ray_rdir, const avxf &ray_tnear, const avxf &ray_tfar, const avx3i& nearXYZ) { /*! stack state */ NodeRef stack[stackSizeSingle]; //!< stack of nodes that still need to get traversed NodeRef* stackPtr = stack+1; //!< current stack pointer NodeRef* stackEnd = stack+stackSizeSingle; stack[0] = root; /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = nearXYZ.x[k]; const size_t nearY = nearXYZ.y[k]; const size_t nearZ = nearXYZ.z[k]; /*! load the ray into SIMD registers */ const avx3f org (ray_org .x[k],ray_org .y[k],ray_org .z[k]); const avx3f rdir(ray_rdir.x[k],ray_rdir.y[k],ray_rdir.z[k]); const avx3f norg = -org, org_rdir(org*rdir); const avxf rayNear(ray_tnear[k]), rayFar(ray_tfar[k]); const Node * __restrict__ nodes = (Node *)bvh->nodePtr(); const Triangle * __restrict__ accel = (Triangle*)bvh->triPtr(); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = (NodeRef) *stackPtr; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(shadow.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = (Node*)cur.node(nodes); const size_t farX = nearX ^ sizeof(avxf), farY = nearY ^ sizeof(avxf), farZ = nearZ ^ sizeof(avxf); #if defined (__AVX2__) const avxf tNearX = msub(load8f((const char*)node+nearX), rdir.x, org_rdir.x); const avxf tNearY = msub(load8f((const char*)node+nearY), rdir.y, org_rdir.y); const avxf tNearZ = msub(load8f((const char*)node+nearZ), rdir.z, org_rdir.z); const avxf tFarX = msub(load8f((const char*)node+farX ), rdir.x, org_rdir.x); const avxf tFarY = msub(load8f((const char*)node+farY ), rdir.y, org_rdir.y); const avxf tFarZ = msub(load8f((const char*)node+farZ ), rdir.z, org_rdir.z); #else const avxf tNearX = (norg.x + load8f((const char*)node+nearX)) * rdir.x; const avxf tNearY = (norg.y + load8f((const char*)node+nearY)) * rdir.y; const avxf tNearZ = (norg.z + load8f((const char*)node+nearZ)) * rdir.z; const avxf tFarX = (norg.x + load8f((const char*)node+farX )) * rdir.x; const avxf tFarY = (norg.y + load8f((const char*)node+farY )) * rdir.y; const avxf tFarZ = (norg.z + load8f((const char*)node+farZ )) * rdir.z; #endif #if defined(__AVX2__) const avxf tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,rayNear)); const avxf tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,rayFar )); const avxb vmask = cast(tNear) > cast(tFar); unsigned int mask = movemask(vmask)^0xff; #else const avxf tNear = max(tNearX,tNearY,tNearZ,rayNear); const avxf tFar = min(tFarX ,tFarY ,tFarZ ,rayFar); const avxb vmask = tNear <= tFar; unsigned int mask = movemask(vmask); #endif /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); assert(cur != BVH4i::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const unsigned int d0 = ((unsigned int*)&tNear)[r]; r = __bscf(mask); NodeRef c1 = node->child(r); const unsigned int d1 = ((unsigned int*)&tNear)[r]; assert(c0 != BVH4i::emptyNode); assert(c1 != BVH4i::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { *stackPtr = c1; stackPtr++; cur = c0; continue; } else { *stackPtr = c0; stackPtr++; cur = c1; continue; } } assert(stackPtr < stackEnd); *stackPtr = c0; stackPtr++; assert(stackPtr < stackEnd); *stackPtr = c1; stackPtr++; /*! three children are hit */ r = __bscf(mask); cur = node->child(r); assert(cur != BVH4i::emptyNode); if (likely(mask == 0)) continue; while(1) { r = __bscf(mask); NodeRef c = node->child(r); *stackPtr = c; stackPtr++; if (unlikely(mask == 0)) break; } cur = (NodeRef) stackPtr[-1]; stackPtr--; // assert(stackPtr < stackEnd); // *stackPtr = cur; stackPtr++; // /*! four children are hit */ // cur = node->child(3); // assert(cur != BVH4i::emptyNode); } /*! this is a leaf node */ STAT3(shadow.trav_leaves,1,1,1); size_t num; Triangle* prim = (Triangle*) cur.leaf(accel,num); if (TriangleIntersector8::occluded(ray,k,prim,num,bvh->geometry)) { ray.geomID[k] = 0; return true; } } return false; }
void BVH4iIntersector16Hybrid<LeafIntersector,ENABLE_COMPRESSED_BVH4I_NODES>::intersect(mic_i* valid_i, BVH4i* bvh, Ray16& ray16) { /* near and node stack */ __aligned(64) mic_f stack_dist[3*BVH4i::maxDepth+1]; __aligned(64) NodeRef stack_node[3*BVH4i::maxDepth+1]; __aligned(64) NodeRef stack_node_single[3*BVH4i::maxDepth+1]; /* load ray */ const mic_m valid0 = *(mic_i*)valid_i != mic_i(0); const mic3f rdir16 = rcp_safe(ray16.dir); const mic3f org_rdir16 = ray16.org * rdir16; mic_f ray_tnear = select(valid0,ray16.tnear,pos_inf); mic_f ray_tfar = select(valid0,ray16.tfar ,neg_inf); const mic_f inf = mic_f(pos_inf); /* allocate stack and push root node */ stack_node[0] = BVH4i::invalidNode; stack_dist[0] = inf; stack_node[1] = bvh->root; stack_dist[1] = ray_tnear; NodeRef* __restrict__ sptr_node = stack_node + 2; mic_f* __restrict__ sptr_dist = stack_dist + 2; const Node * __restrict__ nodes = (Node *)bvh->nodePtr(); const Triangle1 * __restrict__ accel = (Triangle1*)bvh->triPtr(); while (1) pop: { /* pop next node from stack */ NodeRef curNode = *(sptr_node-1); mic_f curDist = *(sptr_dist-1); sptr_node--; sptr_dist--; const mic_m m_stackDist = ray_tfar > curDist; /* stack emppty ? */ if (unlikely(curNode == BVH4i::invalidNode)) break; /* cull node if behind closest hit point */ if (unlikely(none(m_stackDist))) continue; /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /* switch to single ray mode */ if (unlikely(countbits(m_stackDist) <= BVH4i::hybridSIMDUtilSwitchThreshold)) { float *__restrict__ stack_dist_single = (float*)sptr_dist; store16f(stack_dist_single,inf); /* traverse single ray */ long rayIndex = -1; while((rayIndex = bitscan64(rayIndex,m_stackDist)) != BITSCAN_NO_BIT_SET_64) { stack_node_single[0] = BVH4i::invalidNode; stack_node_single[1] = curNode; size_t sindex = 2; const mic_f org_xyz = loadAOS4to16f(rayIndex,ray16.org.x,ray16.org.y,ray16.org.z); const mic_f dir_xyz = loadAOS4to16f(rayIndex,ray16.dir.x,ray16.dir.y,ray16.dir.z); const mic_f rdir_xyz = loadAOS4to16f(rayIndex,rdir16.x,rdir16.y,rdir16.z); const mic_f org_rdir_xyz = org_xyz * rdir_xyz; const mic_f min_dist_xyz = broadcast1to16f(&ray16.tnear[rayIndex]); mic_f max_dist_xyz = broadcast1to16f(&ray16.tfar[rayIndex]); const unsigned int leaf_mask = BVH4I_LEAF_MASK; while (1) { NodeRef curNode = stack_node_single[sindex-1]; sindex--; traverse_single_intersect<ENABLE_COMPRESSED_BVH4I_NODES>(curNode, sindex, rdir_xyz, org_rdir_xyz, min_dist_xyz, max_dist_xyz, stack_node_single, stack_dist_single, nodes, leaf_mask); /* return if stack is empty */ if (unlikely(curNode == BVH4i::invalidNode)) break; /* intersect one ray against four triangles */ const bool hit = LeafIntersector::intersect(curNode, rayIndex, dir_xyz, org_xyz, min_dist_xyz, max_dist_xyz, ray16, accel, (Scene*)bvh->geometry); if (hit) compactStack(stack_node_single,stack_dist_single,sindex,max_dist_xyz); } } ray_tfar = select(valid0,ray16.tfar ,neg_inf); continue; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////// const unsigned int leaf_mask = BVH4I_LEAF_MASK; const mic3f org = ray16.org; const mic3f dir = ray16.dir; while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf(leaf_mask))) break; STAT3(normal.trav_nodes,1,popcnt(ray_tfar > curDist),16); const Node* __restrict__ const node = curNode.node(nodes); prefetch<PFHINT_L1>((mic_f*)node + 0); prefetch<PFHINT_L1>((mic_f*)node + 1); /* pop of next node */ sptr_node--; sptr_dist--; curNode = *sptr_node; curDist = *sptr_dist; #pragma unroll(4) for (unsigned int i=0; i<4; i++) { BVH4i::NodeRef child; mic_f lclipMinX,lclipMinY,lclipMinZ; mic_f lclipMaxX,lclipMaxY,lclipMaxZ; if (!ENABLE_COMPRESSED_BVH4I_NODES) { child = node->lower[i].child; lclipMinX = msub(node->lower[i].x,rdir16.x,org_rdir16.x); lclipMinY = msub(node->lower[i].y,rdir16.y,org_rdir16.y); lclipMinZ = msub(node->lower[i].z,rdir16.z,org_rdir16.z); lclipMaxX = msub(node->upper[i].x,rdir16.x,org_rdir16.x); lclipMaxY = msub(node->upper[i].y,rdir16.y,org_rdir16.y); lclipMaxZ = msub(node->upper[i].z,rdir16.z,org_rdir16.z); } else { BVH4i::QuantizedNode* __restrict__ const compressed_node = (BVH4i::QuantizedNode*)node; child = compressed_node->child(i); const mic_f startXYZ = compressed_node->decompress_startXYZ(); const mic_f diffXYZ = compressed_node->decompress_diffXYZ(); const mic_f clower = compressed_node->decompress_lowerXYZ(startXYZ,diffXYZ); const mic_f cupper = compressed_node->decompress_upperXYZ(startXYZ,diffXYZ); lclipMinX = msub(mic_f(clower[4*i+0]),rdir16.x,org_rdir16.x); lclipMinY = msub(mic_f(clower[4*i+1]),rdir16.y,org_rdir16.y); lclipMinZ = msub(mic_f(clower[4*i+2]),rdir16.z,org_rdir16.z); lclipMaxX = msub(mic_f(cupper[4*i+0]),rdir16.x,org_rdir16.x); lclipMaxY = msub(mic_f(cupper[4*i+1]),rdir16.y,org_rdir16.y); lclipMaxZ = msub(mic_f(cupper[4*i+2]),rdir16.z,org_rdir16.z); } if (unlikely(i >=2 && child == BVH4i::invalidNode)) break; const mic_f lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const mic_f lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const mic_m lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); const mic_f childDist = select(lhit,lnearP,inf); const mic_m m_child_dist = childDist < curDist; /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { sptr_node++; sptr_dist++; /* push cur node onto stack and continue with hit child */ if (any(m_child_dist)) { *(sptr_node-1) = curNode; *(sptr_dist-1) = curDist; curDist = childDist; curNode = child; } /* push hit child onto stack*/ else { *(sptr_node-1) = child; *(sptr_dist-1) = childDist; const char* __restrict__ const pnode = (char*)child.node(nodes); prefetch<PFHINT_L2>(pnode + 0); prefetch<PFHINT_L2>(pnode + 64); } assert(sptr_node - stack_node < BVH4i::maxDepth); } } #if SWITCH_ON_DOWN_TRAVERSAL == 1 const mic_m curUtil = ray_tfar > curDist; if (unlikely(countbits(curUtil) <= BVH4i::hybridSIMDUtilSwitchThreshold)) { *sptr_node++ = curNode; *sptr_dist++ = curDist; goto pop; } #endif } /* return if stack is empty */ if (unlikely(curNode == BVH4i::invalidNode)) break; /* intersect leaf */ const mic_m m_valid_leaf = ray_tfar > curDist; STAT3(normal.trav_leaves,1,popcnt(m_valid_leaf),16); LeafIntersector::intersect16(curNode,m_valid_leaf,dir,org,ray16,accel,(Scene*)bvh->geometry); ray_tfar = select(m_valid_leaf,ray16.tfar,ray_tfar); }
void BVH8Intersector1<robust,PrimitiveIntersector>::occluded(const BVH8* bvh, Ray& ray) { /*! perform per ray precalculations required by the primitive intersector */ Precalculations pre(ray,bvh); /*! stack state */ NodeRef stack[stackSize]; //!< stack of nodes that still need to get traversed NodeRef* stackPtr = stack+1; //!< current stack pointer NodeRef* stackEnd = stack+stackSize; stack[0] = bvh->root; /* filter out invalid rays */ #if defined(RTCORE_IGNORE_INVALID_RAYS) if (!ray.valid()) return; #endif /* verify correct input */ assert(ray.tnear > -FLT_MIN); //assert(!(types & BVH4::FLAG_NODE_MB) || (ray.time >= 0.0f && ray.time <= 1.0f)); /*! load the ray into SIMD registers */ const Vec3f8 norg(-ray.org.x,-ray.org.y,-ray.org.z); const Vec3fa ray_rdir = rcp_safe(ray.dir); const Vec3f8 rdir(ray_rdir.x,ray_rdir.y,ray_rdir.z); const Vec3fa ray_org_rdir = ray.org*ray_rdir; const Vec3f8 org_rdir(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z); const float8 ray_near(ray.tnear); float8 ray_far(ray.tfar); /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray_rdir.x >= 0 ? 0*sizeof(float8) : 1*sizeof(float8); const size_t nearY = ray_rdir.y >= 0 ? 2*sizeof(float8) : 3*sizeof(float8); const size_t nearZ = ray_rdir.z >= 0 ? 4*sizeof(float8) : 5*sizeof(float8); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = (NodeRef) *stackPtr; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(shadow.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(); const size_t farX = nearX ^ sizeof(float8), farY = nearY ^ sizeof(float8), farZ = nearZ ^ sizeof(float8); #if defined (__AVX2__) const float8 tNearX = msub(load8f((const char*)node+nearX), rdir.x, org_rdir.x); const float8 tNearY = msub(load8f((const char*)node+nearY), rdir.y, org_rdir.y); const float8 tNearZ = msub(load8f((const char*)node+nearZ), rdir.z, org_rdir.z); const float8 tFarX = msub(load8f((const char*)node+farX ), rdir.x, org_rdir.x); const float8 tFarY = msub(load8f((const char*)node+farY ), rdir.y, org_rdir.y); const float8 tFarZ = msub(load8f((const char*)node+farZ ), rdir.z, org_rdir.z); #else const float8 tNearX = (norg.x + load8f((const char*)node+nearX)) * rdir.x; const float8 tNearY = (norg.y + load8f((const char*)node+nearY)) * rdir.y; const float8 tNearZ = (norg.z + load8f((const char*)node+nearZ)) * rdir.z; const float8 tFarX = (norg.x + load8f((const char*)node+farX )) * rdir.x; const float8 tFarY = (norg.y + load8f((const char*)node+farY )) * rdir.y; const float8 tFarZ = (norg.z + load8f((const char*)node+farZ )) * rdir.z; #endif #if defined(__AVX2__) const float8 tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,ray_near)); const float8 tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,ray_far )); const bool8 vmask = cast(tNear) > cast(tFar); size_t mask = movemask(vmask)^0xff; #else const float8 tNear = max(tNearX,tNearY,tNearZ,ray_near); const float8 tFar = min(tFarX ,tFarY ,tFarZ ,ray_far); const bool8 vmask = tNear <= tFar; size_t mask = movemask(vmask); #endif /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); cur.prefetch(); assert(cur != BVH8::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); c0.prefetch(); const unsigned int d0 = ((unsigned int*)&tNear)[r]; r = __bscf(mask); NodeRef c1 = node->child(r); c1.prefetch(); const unsigned int d1 = ((unsigned int*)&tNear)[r]; assert(c0 != BVH8::emptyNode); assert(c1 != BVH8::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { *stackPtr = c1; stackPtr++; cur = c0; continue; } else { *stackPtr = c0; stackPtr++; cur = c1; continue; } } assert(stackPtr < stackEnd); *stackPtr = c0; stackPtr++; assert(stackPtr < stackEnd); *stackPtr = c1; stackPtr++; /*! three children are hit */ r = __bscf(mask); cur = node->child(r); cur.prefetch(); *stackPtr = cur; stackPtr++; if (likely(mask == 0)) { stackPtr--; continue; } /*! process more than three children */ while(1) { r = __bscf(mask); NodeRef c = node->child(r); c.prefetch(); *stackPtr = c; stackPtr++; if (unlikely(mask == 0)) break; } cur = (NodeRef) stackPtr[-1]; stackPtr--; } /*! this is a leaf node */ assert(cur != BVH8::emptyNode); STAT3(shadow.trav_leaves,1,1,1); size_t num; Primitive* prim = (Primitive*) cur.leaf(num); size_t lazy_node = 0; if (PrimitiveIntersector::occluded(pre,ray,prim,num,bvh->scene,lazy_node)) { ray.geomID = 0; break; } if (unlikely(lazy_node)) { *stackPtr = (NodeRef)lazy_node; stackPtr++; } } AVX_ZERO_UPPER(); }
unsigned int * mgcd(unsigned int *u, unsigned int *v) { int i, k, n; unsigned int *t; if (MZERO(u)) { t = mcopy(v); MSIGN(t) = 1; return t; } if (MZERO(v)) { t = mcopy(u); MSIGN(t) = 1; return t; } u = mcopy(u); v = mcopy(v); MSIGN(u) = 1; MSIGN(v) = 1; k = 0; while ((u[0] & 1) == 0 && (v[0] & 1) == 0) { mshiftright(u); mshiftright(v); k++; } if (u[0] & 1) { t = mcopy(v); MSIGN(t) *= -1; } else t = mcopy(u); while (1) { while ((t[0] & 1) == 0) mshiftright(t); if (MSIGN(t) == 1) { mfree(u); u = mcopy(t); } else { mfree(v); v = mcopy(t); MSIGN(v) *= -1; } mfree(t); t = msub(u, v); if (MZERO(t)) { mfree(t); mfree(v); n = (k / 32) + 1; v = mnew(n); MSIGN(v) = 1; MLENGTH(v) = n; for (i = 0; i < n; i++) v[i] = 0; mp_set_bit(v, k); t = mmul(u, v); mfree(u); mfree(v); return t; } } }
NOMIPS16 int main() { v2sf a, b, c, d, e, f; float f1, f2; f1 = 1.2; f2 = 3.4; a = init (f1, f2); b = (v2sf) {1.2, 3.4}; if (!__builtin_mips_upper_c_eq_ps (a, b) || !__builtin_mips_lower_c_eq_ps (a, b)) abort (); a = (v2sf) {1.2, 2.3}; b = (v2sf) {5.3, 6.1}; b = move (a); if (!__builtin_mips_upper_c_eq_ps (a, b) || !__builtin_mips_lower_c_eq_ps (a, b)) abort (); a = (v2sf) {1.2, 2.3}; b = (v2sf) {5.3, 6.1}; c = add (a, b); d = (v2sf) {6.5, 8.4}; if (!__builtin_mips_upper_c_eq_ps (c, d) || !__builtin_mips_lower_c_eq_ps (c, d)) abort (); a = (v2sf) {1, 12}; b = (v2sf) {5, 6}; c = sub (a, b); d = (v2sf) {-4, 6}; if (!__builtin_mips_upper_c_eq_ps (c, d) || !__builtin_mips_lower_c_eq_ps (c, d)) abort (); a = (v2sf) {1, 12}; b = (v2sf) {5, 6}; c = mul (a, b); d = (v2sf) {5, 72}; if (!__builtin_mips_upper_c_eq_ps (c, d) || !__builtin_mips_lower_c_eq_ps (c, d)) abort (); a = (v2sf) {1, 12}; b = (v2sf) {5, 6}; c = (v2sf) {5, 6}; d = madd (a, b, c); e = (v2sf) {10, 78}; if (!__builtin_mips_upper_c_eq_ps (d, e) || !__builtin_mips_lower_c_eq_ps (d, e)) abort (); a = (v2sf) {1, 12}; b = (v2sf) {5, 6}; c = (v2sf) {5, 6}; d = msub (a, b, c); e = (v2sf) {0, 66}; if (!__builtin_mips_upper_c_eq_ps (d, e) || !__builtin_mips_lower_c_eq_ps (d, e)) abort (); a = (v2sf) {1, 12}; b = (v2sf) {5, 6}; c = (v2sf) {5, 6}; d = nmadd (a, b, c); e = (v2sf) {-10, -78}; if (!__builtin_mips_upper_c_eq_ps (d, e) || !__builtin_mips_lower_c_eq_ps (d, e)) abort (); a = (v2sf) {1, 12}; b = (v2sf) {5, 6}; c = (v2sf) {5, 6}; d = nmsub (a, b, c); e = (v2sf) {0, -66}; if (!__builtin_mips_upper_c_eq_ps (d, e) || !__builtin_mips_lower_c_eq_ps (d, e)) abort (); a = (v2sf) {98, 12}; b = neg (a); c = (v2sf) {-98, -12}; if (!__builtin_mips_upper_c_eq_ps (b, c) || !__builtin_mips_lower_c_eq_ps (b, c)) abort (); a = (v2sf) {1, 12}; b = (v2sf) {5, 6}; c = cond_move1 (a, b, 1000); if (!__builtin_mips_upper_c_eq_ps (c, a) || !__builtin_mips_lower_c_eq_ps (c, a)) abort (); a = (v2sf) {1, 12}; b = (v2sf) {5, 6}; c = cond_move2 (a, b, -1000); if (!__builtin_mips_upper_c_eq_ps (c, b) || !__builtin_mips_lower_c_eq_ps (c, b)) abort (); a = (v2sf) {1, 12}; b = (v2sf) {5, 6}; c = cond_move3 (a, b, 9.0); if (!__builtin_mips_upper_c_eq_ps (c, a) || !__builtin_mips_lower_c_eq_ps (c, a)) abort (); a = (v2sf) {1, 12}; b = (v2sf) {5, 6}; c = cond_move4 (a, b, -10.0); if (!__builtin_mips_upper_c_eq_ps (c, b) || !__builtin_mips_lower_c_eq_ps (c, b)) abort (); a = (v2sf) {5, 12}; b = (v2sf) {5, 6}; c = (v2sf) {33, 123}; d = (v2sf) {8, 78}; e = __builtin_mips_movt_c_eq_ps (a, b, c, d); f = (v2sf) {8, 123}; if (!__builtin_mips_upper_c_eq_ps (e, f) || !__builtin_mips_lower_c_eq_ps (e, f)) abort (); a = (v2sf) {5, 12}; b = (v2sf) {5, 6}; c = (v2sf) {33, 123}; d = (v2sf) {8, 78}; e = __builtin_mips_movf_c_eq_ps (a, b, c, d); f = (v2sf) {33, 78}; if (!__builtin_mips_upper_c_eq_ps (e, f) || !__builtin_mips_lower_c_eq_ps (e, f)) abort (); a = load(); b = (v2sf) {100, 200}; if (!__builtin_mips_upper_c_eq_ps (a, b) || !__builtin_mips_lower_c_eq_ps (a, b)) abort (); a = (v2sf) {123, 321}; store (a); b = load(); if (!__builtin_mips_upper_c_eq_ps (a, b) || !__builtin_mips_lower_c_eq_ps (a, b)) abort (); printf ("Test Passes\n"); exit (0); }
void BVH8Intersector1<robust,PrimitiveIntersector>::intersect(const BVH8* bvh, Ray& ray) { /*! perform per ray precalculations required by the primitive intersector */ Precalculations pre(ray,bvh); /*! stack state */ StackItemT<NodeRef> stack[stackSize]; //!< stack of nodes StackItemT<NodeRef>* stackPtr = stack+1; //!< current stack pointer StackItemT<NodeRef>* stackEnd = stack+stackSize; stack[0].ptr = bvh->root; stack[0].dist = neg_inf; /* filter out invalid rays */ #if defined(RTCORE_IGNORE_INVALID_RAYS) if (!ray.valid()) return; #endif /* verify correct input */ assert(ray.tnear > -FLT_MIN); //assert(!(types & BVH4::FLAG_NODE_MB) || (ray.time >= 0.0f && ray.time <= 1.0f)); /*! load the ray into SIMD registers */ const Vec3f8 norg(-ray.org.x,-ray.org.y,-ray.org.z); const Vec3fa ray_rdir = rcp_safe(ray.dir); const Vec3f8 rdir(ray_rdir.x,ray_rdir.y,ray_rdir.z); const Vec3fa ray_org_rdir = ray.org*ray_rdir; const Vec3f8 org_rdir(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z); const float8 ray_near(ray.tnear); float8 ray_far(ray.tfar); /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray_rdir.x >= 0.0f ? 0*sizeof(float8) : 1*sizeof(float8); const size_t nearY = ray_rdir.y >= 0.0f ? 2*sizeof(float8) : 3*sizeof(float8); const size_t nearZ = ray_rdir.z >= 0.0f ? 4*sizeof(float8) : 5*sizeof(float8); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = NodeRef(stackPtr->ptr); /*! if popped node is too far, pop next one */ if (unlikely(*(float*)&stackPtr->dist > ray.tfar)) continue; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(normal.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(); const size_t farX = nearX ^ sizeof(float8), farY = nearY ^ sizeof(float8), farZ = nearZ ^ sizeof(float8); #if defined (__AVX2__) const float8 tNearX = msub(load8f((const char*)node+nearX), rdir.x, org_rdir.x); const float8 tNearY = msub(load8f((const char*)node+nearY), rdir.y, org_rdir.y); const float8 tNearZ = msub(load8f((const char*)node+nearZ), rdir.z, org_rdir.z); const float8 tFarX = msub(load8f((const char*)node+farX ), rdir.x, org_rdir.x); const float8 tFarY = msub(load8f((const char*)node+farY ), rdir.y, org_rdir.y); const float8 tFarZ = msub(load8f((const char*)node+farZ ), rdir.z, org_rdir.z); #else const float8 tNearX = (norg.x + load8f((const char*)node+nearX)) * rdir.x; const float8 tNearY = (norg.y + load8f((const char*)node+nearY)) * rdir.y; const float8 tNearZ = (norg.z + load8f((const char*)node+nearZ)) * rdir.z; const float8 tFarX = (norg.x + load8f((const char*)node+farX )) * rdir.x; const float8 tFarY = (norg.y + load8f((const char*)node+farY )) * rdir.y; const float8 tFarZ = (norg.z + load8f((const char*)node+farZ )) * rdir.z; #endif const float round_down = 1.0f-2.0f*float(ulp); const float round_up = 1.0f+2.0f*float(ulp); #if defined(__AVX2__) const float8 tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,ray_near)); const float8 tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,ray_far )); const bool8 vmask = robust ? (round_down*tNear > round_up*tFar) : cast(tNear) > cast(tFar); size_t mask = movemask(vmask)^0xff; #else const float8 tNear = max(tNearX,tNearY,tNearZ,ray_near); const float8 tFar = min(tFarX ,tFarY ,tFarZ ,ray_far); const bool8 vmask = robust ? (round_down*tNear > round_up*tFar) : tNear <= tFar; size_t mask = movemask(vmask); #endif /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); cur.prefetch(); assert(cur != BVH8::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); c0.prefetch(); const unsigned int d0 = ((unsigned int*)&tNear)[r]; r = __bscf(mask); NodeRef c1 = node->child(r); c1.prefetch(); const unsigned int d1 = ((unsigned int*)&tNear)[r]; assert(c0 != BVH8::emptyNode); assert(c1 != BVH8::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; continue; } else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; continue; } } /*! Here starts the slow path for 3 or 4 hit children. We push * all nodes onto the stack to sort them there. */ assert(stackPtr < stackEnd); stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; assert(stackPtr < stackEnd); stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); NodeRef c = node->child(r); c.prefetch(); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; assert(c != BVH8::emptyNode); if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ r = __bscf(mask); c = node->child(r); c.prefetch(); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } /*! fallback case if more than 4 children are hit */ while (1) { r = __bscf(mask); assert(stackPtr < stackEnd); c = node->child(r); c.prefetch(); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; if (unlikely(mask == 0)) break; } cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; } /*! this is a leaf node */ assert(cur != BVH8::emptyNode); STAT3(normal.trav_leaves,1,1,1); size_t num; Primitive* prim = (Primitive*) cur.leaf(num); size_t lazy_node = 0; PrimitiveIntersector::intersect(pre,ray,prim,num,bvh->scene,lazy_node); ray_far = ray.tfar; if (unlikely(lazy_node)) { stackPtr->ptr = lazy_node; stackPtr->dist = inf; stackPtr++; } } AVX_ZERO_UPPER(); }
void BVH4Intersector1<PrimitiveIntersector>::intersect(const BVH4* bvh, Ray& ray) { /*! stack state */ StackItemInt32<NodeRef> stack[stackSize]; //!< stack of nodes StackItemInt32<NodeRef>* stackPtr = stack+1; //!< current stack pointer StackItemInt32<NodeRef>* stackEnd = stack+stackSize; stack[0].ptr = bvh->root; stack[0].dist = neg_inf; /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray.dir.x >= 0.0f ? 0*sizeof(ssef) : 1*sizeof(ssef); const size_t nearY = ray.dir.y >= 0.0f ? 2*sizeof(ssef) : 3*sizeof(ssef); const size_t nearZ = ray.dir.z >= 0.0f ? 4*sizeof(ssef) : 5*sizeof(ssef); #if 0 // FIXME: why is this slower /*! load the ray */ Vec3fa ray_org = ray.org; Vec3fa ray_dir = ray.dir; ssef ray_near = max(ray.tnear,FLT_MIN); // we do not support negative tnear values in this kernel due to integer optimizations ssef ray_far = ray.tfar; #if defined(__FIX_RAYS__) const float float_range = 0.1f*FLT_MAX; ray_org = clamp(ray_org,Vec3fa(-float_range),Vec3fa(+float_range)); ray_dir = clamp(ray_dir,Vec3fa(-float_range),Vec3fa(+float_range)); ray_far = min(ray_far,float(inf)); #endif const Vec3fa ray_rdir = rcp_safe(ray_dir); const sse3f org(ray_org), dir(ray_dir); const sse3f norg(-ray_org), rdir(ray_rdir), org_rdir(ray_org*ray_rdir); #else /*! load the ray into SIMD registers */ const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const Vec3fa ray_rdir = rcp_safe(ray.dir); const sse3f rdir(ray_rdir.x,ray_rdir.y,ray_rdir.z); const Vec3fa ray_org_rdir = ray.org*ray_rdir; const sse3f org_rdir(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z); const ssef ray_near(ray.tnear); ssef ray_far(ray.tfar); #endif /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = NodeRef(stackPtr->ptr); /*! if popped node is too far, pop next one */ if (unlikely(*(float*)&stackPtr->dist > ray.tfar)) continue; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(normal.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(); const size_t farX = nearX ^ 16, farY = nearY ^ 16, farZ = nearZ ^ 16; #if defined (__AVX2__) const ssef tNearX = msub(load4f((const char*)node+nearX), rdir.x, org_rdir.x); const ssef tNearY = msub(load4f((const char*)node+nearY), rdir.y, org_rdir.y); const ssef tNearZ = msub(load4f((const char*)node+nearZ), rdir.z, org_rdir.z); const ssef tFarX = msub(load4f((const char*)node+farX ), rdir.x, org_rdir.x); const ssef tFarY = msub(load4f((const char*)node+farY ), rdir.y, org_rdir.y); const ssef tFarZ = msub(load4f((const char*)node+farZ ), rdir.z, org_rdir.z); #else const ssef tNearX = (norg.x + load4f((const char*)node+nearX)) * rdir.x; const ssef tNearY = (norg.y + load4f((const char*)node+nearY)) * rdir.y; const ssef tNearZ = (norg.z + load4f((const char*)node+nearZ)) * rdir.z; const ssef tFarX = (norg.x + load4f((const char*)node+farX )) * rdir.x; const ssef tFarY = (norg.y + load4f((const char*)node+farY )) * rdir.y; const ssef tFarZ = (norg.z + load4f((const char*)node+farZ )) * rdir.z; #endif #if defined(__SSE4_1__) const ssef tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,ray_near)); const ssef tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,ray_far )); const sseb vmask = cast(tNear) > cast(tFar); size_t mask = movemask(vmask)^0xf; #else const ssef tNear = max(tNearX,tNearY,tNearZ,ray_near); const ssef tFar = min(tFarX ,tFarY ,tFarZ ,ray_far); const sseb vmask = tNear <= tFar; size_t mask = movemask(vmask); #endif /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); assert(cur != BVH4::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const unsigned int d0 = ((unsigned int*)&tNear)[r]; r = __bscf(mask); NodeRef c1 = node->child(r); const unsigned int d1 = ((unsigned int*)&tNear)[r]; assert(c0 != BVH4::emptyNode); assert(c1 != BVH4::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; continue; } else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; continue; } } /*! Here starts the slow path for 3 or 4 hit children. We push * all nodes onto the stack to sort them there. */ assert(stackPtr < stackEnd); stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; assert(stackPtr < stackEnd); stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; /*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); NodeRef c = node->child(r); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; assert(c != BVH4::emptyNode); if (likely(mask == 0)) { sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; continue; } /*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */ assert(stackPtr < stackEnd); r = __bscf(mask); c = node->child(r); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++; assert(c != BVH4::emptyNode); sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; } /*! this is a leaf node */ STAT3(normal.trav_leaves,1,1,1); size_t num; Primitive* prim = (Primitive*) cur.leaf(num); PrimitiveIntersector::intersect(ray,prim,num,bvh->geometry); ray_far = ray.tfar; } }
bool BVH4iIntersector1<TriangleIntersector>::occluded(const BVH4iIntersector1* This, Ray& ray) { AVX_ZERO_UPPER(); STAT3(shadow.travs,1,1,1); /*! stack state */ const BVH4i* bvh = This->bvh; NodeRef stack[1+3*BVH4i::maxDepth]; //!< stack of nodes that still need to get traversed NodeRef* stackPtr = stack+1; //!< current stack pointer stack[0] = bvh->root; /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray.dir.x >= 0 ? 0*sizeof(ssef_m) : 1*sizeof(ssef_m); const size_t nearY = ray.dir.y >= 0 ? 2*sizeof(ssef_m) : 3*sizeof(ssef_m); const size_t nearZ = ray.dir.z >= 0 ? 4*sizeof(ssef_m) : 5*sizeof(ssef_m); /*! load the ray into SIMD registers */ const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const Vector3f ray_rdir = rcp_safe(ray.dir); const sse3f rdir(ray_rdir.x,ray_rdir.y,ray_rdir.z); const Vector3f ray_org_rdir = ray.org*ray_rdir; const sse3f org_rdir(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z); const ssef rayNear(ray.tnear); const ssef rayFar(ray.tfar); const void* nodePtr = bvh->nodePtr(); const void* triPtr = bvh->triPtr(); /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = (NodeRef) *stackPtr; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(shadow.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(nodePtr); const size_t farX = nearX ^ 16, farY = nearY ^ 16, farZ = nearZ ^ 16; #if defined (__AVX2__) const ssef tNearX = msub(ssef((const char*)nodePtr+(size_t)cur+nearX), rdir.x, org_rdir.x); const ssef tNearY = msub(ssef((const char*)nodePtr+(size_t)cur+nearY), rdir.y, org_rdir.y); const ssef tNearZ = msub(ssef((const char*)nodePtr+(size_t)cur+nearZ), rdir.z, org_rdir.z); const ssef tFarX = msub(ssef((const char*)nodePtr+(size_t)cur+farX ), rdir.x, org_rdir.x); const ssef tFarY = msub(ssef((const char*)nodePtr+(size_t)cur+farY ), rdir.y, org_rdir.y); const ssef tFarZ = msub(ssef((const char*)nodePtr+(size_t)cur+farZ ), rdir.z, org_rdir.z); #else const ssef tNearX = (norg.x + ssef((const char*)nodePtr+(size_t)cur+nearX)) * rdir.x; const ssef tNearY = (norg.y + ssef((const char*)nodePtr+(size_t)cur+nearY)) * rdir.y; const ssef tNearZ = (norg.z + ssef((const char*)nodePtr+(size_t)cur+nearZ)) * rdir.z; const ssef tFarX = (norg.x + ssef((const char*)nodePtr+(size_t)cur+farX )) * rdir.x; const ssef tFarY = (norg.y + ssef((const char*)nodePtr+(size_t)cur+farY )) * rdir.y; const ssef tFarZ = (norg.z + ssef((const char*)nodePtr+(size_t)cur+farZ )) * rdir.z; #endif const ssef tNear = max(tNearX,tNearY,tNearZ,rayNear); const ssef tFar = min(tFarX ,tFarY ,tFarZ ,rayFar); size_t mask = movemask(tNear <= tFar); /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bsf(mask); mask = __btc(mask,r); if (likely(mask == 0)) { cur = node->child(r); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const float d0 = tNear[r]; r = __bsf(mask); mask = __btc(mask,r); NodeRef c1 = node->child(r); const float d1 = tNear[r]; if (likely(mask == 0)) { if (d0 < d1) { *stackPtr = c1; stackPtr++; cur = c0; continue; } else { *stackPtr = c0; stackPtr++; cur = c1; continue; } } *stackPtr = c0; stackPtr++; *stackPtr = c1; stackPtr++; /*! three children are hit */ r = __bsf(mask); mask = __btc(mask,r); cur = node->child(r); *stackPtr = cur; stackPtr++; if (likely(mask == 0)) { stackPtr--; continue; } /*! four children are hit */ cur = node->child(3); } /*! this is a leaf node */ STAT3(shadow.trav_leaves,1,1,1); size_t num; Triangle* tri = (Triangle*) cur.leaf(triPtr,num); for (size_t i=0; i<num; i++) { if (TriangleIntersector::occluded(ray,tri[i],bvh->vertices)) { AVX_ZERO_UPPER(); return true; } } } AVX_ZERO_UPPER(); return false; }
void BVH4mbIntersector16Hybrid<LeafIntersector>::intersect(int16* valid_i, BVH4mb* bvh, Ray16& ray16) { /* near and node stack */ __aligned(64) float16 stack_dist[3*BVH4i::maxDepth+1]; __aligned(64) NodeRef stack_node[3*BVH4i::maxDepth+1]; __aligned(64) NodeRef stack_node_single[3*BVH4i::maxDepth+1]; /* load ray */ const bool16 valid0 = *(int16*)valid_i != int16(0); const Vec3f16 rdir16 = rcp_safe(ray16.dir); const Vec3f16 org_rdir16 = ray16.org * rdir16; float16 ray_tnear = select(valid0,ray16.tnear,pos_inf); float16 ray_tfar = select(valid0,ray16.tfar ,neg_inf); const float16 inf = float16(pos_inf); /* allocate stack and push root node */ stack_node[0] = BVH4i::invalidNode; stack_dist[0] = inf; stack_node[1] = bvh->root; stack_dist[1] = ray_tnear; NodeRef* __restrict__ sptr_node = stack_node + 2; float16* __restrict__ sptr_dist = stack_dist + 2; const Node * __restrict__ nodes = (Node *)bvh->nodePtr(); const BVH4mb::Triangle01 * __restrict__ accel = (BVH4mb::Triangle01 *)bvh->triPtr(); while (1) pop: { /* pop next node from stack */ NodeRef curNode = *(sptr_node-1); float16 curDist = *(sptr_dist-1); sptr_node--; sptr_dist--; const bool16 m_stackDist = ray_tfar > curDist; /* stack emppty ? */ if (unlikely(curNode == BVH4i::invalidNode)) break; /* cull node if behind closest hit point */ if (unlikely(none(m_stackDist))) continue; /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /* switch to single ray mode */ if (unlikely(countbits(m_stackDist) <= BVH4i::hybridSIMDUtilSwitchThreshold)) { float *__restrict__ stack_dist_single = (float*)sptr_dist; store16f(stack_dist_single,inf); /* traverse single ray */ long rayIndex = -1; while((rayIndex = bitscan64(rayIndex,m_stackDist)) != BITSCAN_NO_BIT_SET_64) { stack_node_single[0] = BVH4i::invalidNode; stack_node_single[1] = curNode; size_t sindex = 2; const float16 org_xyz = loadAOS4to16f(rayIndex,ray16.org.x,ray16.org.y,ray16.org.z); const float16 dir_xyz = loadAOS4to16f(rayIndex,ray16.dir.x,ray16.dir.y,ray16.dir.z); const float16 rdir_xyz = loadAOS4to16f(rayIndex,rdir16.x,rdir16.y,rdir16.z); const float16 org_rdir_xyz = org_xyz * rdir_xyz; const float16 min_dist_xyz = broadcast1to16f(&ray16.tnear[rayIndex]); float16 max_dist_xyz = broadcast1to16f(&ray16.tfar[rayIndex]); const float16 time = broadcast1to16f(&ray16.time[rayIndex]); const unsigned int leaf_mask = BVH4I_LEAF_MASK; while (1) { NodeRef curNode = stack_node_single[sindex-1]; sindex--; traverse_single_intersect(curNode, sindex, rdir_xyz, org_rdir_xyz, min_dist_xyz, max_dist_xyz, time, stack_node_single, stack_dist_single, nodes, leaf_mask); /* return if stack is empty */ if (unlikely(curNode == BVH4i::invalidNode)) break; /* intersect one ray against four triangles */ const bool hit = LeafIntersector::intersect(curNode, rayIndex, dir_xyz, org_xyz, min_dist_xyz, max_dist_xyz, ray16, accel, (Scene*)bvh->geometry); if (hit) compactStack(stack_node_single,stack_dist_single,sindex,max_dist_xyz); } } ray_tfar = select(valid0,ray16.tfar ,neg_inf); continue; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////// const unsigned int leaf_mask = BVH4I_LEAF_MASK; const float16 time = ray16.time; const float16 one_time = (float16::one() - time); while (1) { /* test if this is a leaf node */ if (unlikely(curNode.isLeaf(leaf_mask))) break; STAT3(normal.trav_nodes,1,popcnt(ray_tfar > curDist),16); const Node* __restrict__ const node = curNode.node(nodes); const BVH4mb::Node* __restrict__ const nodeMB = (BVH4mb::Node*)node; /* pop of next node */ sptr_node--; sptr_dist--; curNode = *sptr_node; curDist = *sptr_dist; prefetch<PFHINT_L1>((char*)node + 0*64); prefetch<PFHINT_L1>((char*)node + 1*64); prefetch<PFHINT_L1>((char*)node + 2*64); prefetch<PFHINT_L1>((char*)node + 3*64); #pragma unroll(4) for (unsigned int i=0; i<4; i++) { const NodeRef child = node->lower[i].child; const float16 lower_x = one_time * nodeMB->lower[i].x + time * nodeMB->lower_t1[i].x; const float16 lower_y = one_time * nodeMB->lower[i].y + time * nodeMB->lower_t1[i].y; const float16 lower_z = one_time * nodeMB->lower[i].z + time * nodeMB->lower_t1[i].z; const float16 upper_x = one_time * nodeMB->upper[i].x + time * nodeMB->upper_t1[i].x; const float16 upper_y = one_time * nodeMB->upper[i].y + time * nodeMB->upper_t1[i].y; const float16 upper_z = one_time * nodeMB->upper[i].z + time * nodeMB->upper_t1[i].z; if (unlikely(i >=2 && child == BVH4i::invalidNode)) break; const float16 lclipMinX = msub(lower_x,rdir16.x,org_rdir16.x); const float16 lclipMinY = msub(lower_y,rdir16.y,org_rdir16.y); const float16 lclipMinZ = msub(lower_z,rdir16.z,org_rdir16.z); const float16 lclipMaxX = msub(upper_x,rdir16.x,org_rdir16.x); const float16 lclipMaxY = msub(upper_y,rdir16.y,org_rdir16.y); const float16 lclipMaxZ = msub(upper_z,rdir16.z,org_rdir16.z); const float16 lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ)); const float16 lfarP = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ)); const bool16 lhit = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar); const float16 childDist = select(lhit,lnearP,inf); const bool16 m_child_dist = childDist < curDist; /* if we hit the child we choose to continue with that child if it is closer than the current next child, or we push it onto the stack */ if (likely(any(lhit))) { sptr_node++; sptr_dist++; /* push cur node onto stack and continue with hit child */ if (any(m_child_dist)) { *(sptr_node-1) = curNode; *(sptr_dist-1) = curDist; curDist = childDist; curNode = child; } /* push hit child onto stack*/ else { *(sptr_node-1) = child; *(sptr_dist-1) = childDist; } assert(sptr_node - stack_node < BVH4i::maxDepth); } } #if SWITCH_ON_DOWN_TRAVERSAL == 1 const bool16 curUtil = ray_tfar > curDist; if (unlikely(countbits(curUtil) <= BVH4i::hybridSIMDUtilSwitchThreshold)) { *sptr_node++ = curNode; *sptr_dist++ = curDist; goto pop; } #endif } /* return if stack is empty */ if (unlikely(curNode == BVH4i::invalidNode)) break; /* intersect leaf */ const bool16 m_valid_leaf = ray_tfar > curDist; STAT3(normal.trav_leaves,1,popcnt(m_valid_leaf),16); LeafIntersector::intersect16(curNode, m_valid_leaf, ray16.dir, ray16.org, ray16, accel, (Scene*)bvh->geometry); ray_tfar = select(m_valid_leaf,ray16.tfar,ray_tfar); }
void BVH4Intersector1<PrimitiveIntersector>::occluded(const BVH4* bvh, Ray& ray) { /*! stack state */ NodeRef stack[stackSize]; //!< stack of nodes that still need to get traversed NodeRef* stackPtr = stack+1; //!< current stack pointer NodeRef* stackEnd = stack+stackSize; stack[0] = bvh->root; /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray.dir.x >= 0 ? 0*sizeof(ssef) : 1*sizeof(ssef); const size_t nearY = ray.dir.y >= 0 ? 2*sizeof(ssef) : 3*sizeof(ssef); const size_t nearZ = ray.dir.z >= 0 ? 4*sizeof(ssef) : 5*sizeof(ssef); #if 0 // FIXME: why is this slower /*! load the ray */ Vec3fa ray_org = ray.org; Vec3fa ray_dir = ray.dir; ssef ray_near = max(ray.tnear,FLT_MIN); // we do not support negative tnear values in this kernel due to integer optimizations ssef ray_far = ray.tfar; #if defined(__FIX_RAYS__) const float float_range = 0.1f*FLT_MAX; ray_org = clamp(ray_org,Vec3fa(-float_range),Vec3fa(+float_range)); ray_dir = clamp(ray_dir,Vec3fa(-float_range),Vec3fa(+float_range)); ray_far = min(ray_far,float(inf)); #endif const Vec3fa ray_rdir = rcp_safe(ray_dir); const sse3f org(ray_org), dir(ray_dir); const sse3f norg(-ray_org), rdir(ray_rdir), org_rdir(ray_org*ray_rdir); #else /*! load the ray into SIMD registers */ const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const Vec3fa ray_rdir = rcp_safe(ray.dir); const sse3f rdir(ray_rdir.x,ray_rdir.y,ray_rdir.z); const Vec3fa ray_org_rdir = ray.org*ray_rdir; const sse3f org_rdir(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z); const ssef ray_near(ray.tnear); ssef ray_far(ray.tfar); #endif /* pop loop */ while (true) pop: { /*! pop next node */ if (unlikely(stackPtr == stack)) break; stackPtr--; NodeRef cur = (NodeRef) *stackPtr; /* downtraversal loop */ while (true) { /*! stop if we found a leaf */ if (unlikely(cur.isLeaf())) break; STAT3(shadow.trav_nodes,1,1,1); /*! single ray intersection with 4 boxes */ const Node* node = cur.node(); const size_t farX = nearX ^ 16, farY = nearY ^ 16, farZ = nearZ ^ 16; #if defined (__AVX2__) const ssef tNearX = msub(load4f((const char*)node+nearX), rdir.x, org_rdir.x); const ssef tNearY = msub(load4f((const char*)node+nearY), rdir.y, org_rdir.y); const ssef tNearZ = msub(load4f((const char*)node+nearZ), rdir.z, org_rdir.z); const ssef tFarX = msub(load4f((const char*)node+farX ), rdir.x, org_rdir.x); const ssef tFarY = msub(load4f((const char*)node+farY ), rdir.y, org_rdir.y); const ssef tFarZ = msub(load4f((const char*)node+farZ ), rdir.z, org_rdir.z); #else const ssef tNearX = (norg.x + load4f((const char*)node+nearX)) * rdir.x; const ssef tNearY = (norg.y + load4f((const char*)node+nearY)) * rdir.y; const ssef tNearZ = (norg.z + load4f((const char*)node+nearZ)) * rdir.z; const ssef tFarX = (norg.x + load4f((const char*)node+farX )) * rdir.x; const ssef tFarY = (norg.y + load4f((const char*)node+farY )) * rdir.y; const ssef tFarZ = (norg.z + load4f((const char*)node+farZ )) * rdir.z; #endif #if defined(__SSE4_1__) const ssef tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,ray_near)); const ssef tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,ray_far )); const sseb vmask = cast(tNear) > cast(tFar); size_t mask = movemask(vmask)^0xf; #else const ssef tNear = max(tNearX,tNearY,tNearZ,ray_near); const ssef tFar = min(tFarX ,tFarY ,tFarZ ,ray_far); const sseb vmask = tNear <= tFar; size_t mask = movemask(vmask); #endif /*! if no child is hit, pop next node */ if (unlikely(mask == 0)) goto pop; /*! one child is hit, continue with that child */ size_t r = __bscf(mask); if (likely(mask == 0)) { cur = node->child(r); assert(cur != BVH4::emptyNode); continue; } /*! two children are hit, push far child, and continue with closer child */ NodeRef c0 = node->child(r); const unsigned int d0 = ((unsigned int*)&tNear)[r]; r = __bscf(mask); NodeRef c1 = node->child(r); const unsigned int d1 = ((unsigned int*)&tNear)[r]; assert(c0 != BVH4::emptyNode); assert(c1 != BVH4::emptyNode); if (likely(mask == 0)) { assert(stackPtr < stackEnd); if (d0 < d1) { *stackPtr = c1; stackPtr++; cur = c0; continue; } else { *stackPtr = c0; stackPtr++; cur = c1; continue; } } assert(stackPtr < stackEnd); *stackPtr = c0; stackPtr++; assert(stackPtr < stackEnd); *stackPtr = c1; stackPtr++; /*! three children are hit */ r = __bscf(mask); cur = node->child(r); assert(cur != BVH4::emptyNode); if (likely(mask == 0)) continue; assert(stackPtr < stackEnd); *stackPtr = cur; stackPtr++; /*! four children are hit */ cur = node->child(3); assert(cur != BVH4::emptyNode); } /*! this is a leaf node */ STAT3(shadow.trav_leaves,1,1,1); size_t num; Primitive* prim = (Primitive*) cur.leaf(num); if (PrimitiveIntersector::occluded(ray,prim,num,bvh->geometry)) { ray.geomID = 0; break; } } AVX_ZERO_UPPER(); }