Esempio n. 1
0
    void BVH4Intersector4Single<types,robust,PrimitiveIntersector4>::intersect(sseb* valid_i, BVH4* bvh, Ray4& ray)
    {
      /* load ray */
      const sseb valid0 = *valid_i;
      sse3f ray_org = ray.org;
      sse3f ray_dir = ray.dir;
      ssef ray_tnear = ray.tnear, ray_tfar  = ray.tfar;
      const sse3f rdir = rcp_safe(ray_dir);
      const sse3f org(ray_org), org_rdir = org * rdir;
      ray_tnear = select(valid0,ray_tnear,ssef(pos_inf));
      ray_tfar  = select(valid0,ray_tfar ,ssef(neg_inf));
      const ssef inf = ssef(pos_inf);
      Precalculations pre(valid0,ray);

      /* compute near/far per ray */
      sse3i nearXYZ;
      nearXYZ.x = select(rdir.x >= 0.0f,ssei(0*(int)sizeof(ssef)),ssei(1*(int)sizeof(ssef)));
      nearXYZ.y = select(rdir.y >= 0.0f,ssei(2*(int)sizeof(ssef)),ssei(3*(int)sizeof(ssef)));
      nearXYZ.z = select(rdir.z >= 0.0f,ssei(4*(int)sizeof(ssef)),ssei(5*(int)sizeof(ssef)));

      /* we have no packet implementation for OBB nodes yet */
      size_t bits = movemask(valid0);
      for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) {
	intersect1(bvh, bvh->root, i, pre, ray, ray_org, ray_dir, rdir, ray_tnear, ray_tfar, nearXYZ);
      }
      AVX_ZERO_UPPER();
    }
Esempio n. 2
0
 void FastInstanceIntersector4::intersect(sseb* valid, const UserGeometryScene::Instance* instance, Ray4& ray, size_t item)
 {
   const sse3f ray_org = ray.org;
   const sse3f ray_dir = ray.dir;
   const ssei ray_geomID = ray.geomID;
   const ssei ray_instID = ray.instID;
   const AffineSpace3faSSE world2local(instance->world2local);
   ray.org = xfmPoint (world2local,ray_org);
   ray.dir = xfmVector(world2local,ray_dir);
   ray.geomID = -1;
   ray.instID = instance->id;
   instance->object->intersect4(valid,(RTCRay4&)ray);
   ray.org = ray_org;
   ray.dir = ray_dir;
   sseb nohit = ray.geomID == ssei(-1);
   ray.geomID = select(nohit,ray_geomID,ray.geomID);
   ray.instID = select(nohit,ray_instID,ray.instID);
 }
void kdtreebenthin::draw<1>(scene& scene, ray4* r, hit4* hit4)
{

        unsigned int signx = movemask(r->D().x());
        unsigned int signy = movemask(r->D().y());
        unsigned int signz = movemask(r->D().z());

        //If the traversal direction is not same for all rays we 
        //  do a single ray traversal
        if (((signx - 1) < 14)       // sign of x is 0xF or 0
            || ((signy - 1) < 14)    // sign of y is 0xF or 0
            || ((signz - 1) < 14)) { // sign of z is 0xF or 0
                hit hit[4];
                for (int i = 0; i < 4; ++i) {
                        vec3f d(r->D().x()[i], r->D().y()[i], r->D().z()[i]);
                        ray ray(r->O(), d);
                        hit[i].prim = -1;
                        draw(scene, ray, hit[i]);
                }
                hit4->prim = ssei(hit[0].prim, hit[1].prim, hit[2].prim, hit[3].prim);
                hit4->u    = ssef(hit[0].u, hit[1].u, hit[2].u, hit[3].u);
                hit4->v    = ssef(hit[0].v, hit[1].v, hit[2].v, hit[3].v);
                return;
        }

        ssef tnear, tfar;
        _boundingBox.clip(*r, tnear, tfar);
        if (movemask(tnear >= tfar) == 0xF)
                return;

        const unsigned int dir[3][2] = {
                { signx & 1 , 1 - (signx & 1) },
                { signy & 1 , 1 - (signy & 1) },
                { signz & 1 , 1 - (signz & 1) } };

        ssef far[MAX_STACK_SIZE];
        ssef near[MAX_STACK_SIZE];
        int  nodes[MAX_STACK_SIZE];

        //push dummyNode onto stack which will cause us to exit
        nodes[0] = 0;
        far[0]  = BPRAY_INF;

        uint32_t stackptr = 1;
        kdnode* currNode = _nodes + 1;

        int activemask = 0xF;
#if MAILBOX
        static uint64_t rayid = 0;
        __sync_add_and_fetch(&rayid, 1);
#endif
        while (true) {
                if (!currNode->isLeaf()) {
                        const int axis  = currNode->getAxis();
                        const int front = currNode->getLeft() + dir[axis][0];
                        const int back  = currNode->getLeft() + dir[axis][1];
                        const ssef dist = currNode->getSplit() - r->O()[axis];
                        const ssef t    = dist * r->rcpD()[axis];

                        currNode   = _nodes + back;
                        if (!(movemask(tnear <= t) & activemask)) continue;

                        currNode   = _nodes + front;
                        if (!(movemask(tfar >= t) & activemask))  continue;

                        nodes[stackptr] = back;
                        near[stackptr]  = max(tnear, t);
                        far[stackptr]   = tfar;
                        tfar            = min(tfar, t);
                        activemask     &= movemask(tnear <= tfar);
                        ++stackptr;
                } else {
                        int primidx   = currNode->getPrimitiveOffset();
                        int primcount = currNode->getNumPrims();

                        for (int i = 0; i != primcount; ++i) {
                                int t = _prims[primidx + i];

                                //prefetch
                                int t2 = _prims[primidx + i + 1];
                                _mm_prefetch((char*)&scene._accels[t2], _MM_HINT_T0);
#if MAILBOX
                                //mailboxing
                                if (mbox.find(scene, rayid, t)) continue;
#endif
                                scene.intersect(t, *r, *hit4);
#if MAILBOX
                                mbox.add(scene, rayid, t);
#endif
                        }

                        if (movemask(tfar < r->tfar) == 0) return;
                        
                        --stackptr;
                        currNode   = nodes[stackptr] + _nodes;
                        tfar       = far[stackptr];
                        tnear      = near[stackptr];
                        activemask = movemask(tnear <= tfar);
                }
        }
}
    void BVH4Intersector4Hybrid<types,robust,PrimitiveIntersector4>::intersect(sseb* valid_i, BVH4* bvh, Ray4& ray)
    {
      /* load ray */
      const sseb valid0 = *valid_i;
      sse3f ray_org = ray.org;
      sse3f ray_dir = ray.dir;
      ssef ray_tnear = ray.tnear, ray_tfar  = ray.tfar;
      const sse3f rdir = rcp_safe(ray_dir);
      const sse3f org(ray_org), org_rdir = org * rdir;
      ray_tnear = select(valid0,ray_tnear,ssef(pos_inf));
      ray_tfar  = select(valid0,ray_tfar ,ssef(neg_inf));
      const ssef inf = ssef(pos_inf);
      Precalculations pre(valid0,ray);

      /* compute near/far per ray */
      sse3i nearXYZ;
      nearXYZ.x = select(rdir.x >= 0.0f,ssei(0*(int)sizeof(ssef)),ssei(1*(int)sizeof(ssef)));
      nearXYZ.y = select(rdir.y >= 0.0f,ssei(2*(int)sizeof(ssef)),ssei(3*(int)sizeof(ssef)));
      nearXYZ.z = select(rdir.z >= 0.0f,ssei(4*(int)sizeof(ssef)),ssei(5*(int)sizeof(ssef)));

      /* allocate stack and push root node */
      ssef    stack_near[stackSizeChunk];
      NodeRef stack_node[stackSizeChunk];
      stack_node[0] = BVH4::invalidNode;
      stack_near[0] = inf;
      stack_node[1] = bvh->root;
      stack_near[1] = ray_tnear; 
      NodeRef* stackEnd = stack_node+stackSizeChunk;
      NodeRef* __restrict__ sptr_node = stack_node + 2;
      ssef*    __restrict__ sptr_near = stack_near + 2;
      
      while (1) pop:
      {
        /* pop next node from stack */
        assert(sptr_node > stack_node);
        sptr_node--;
        sptr_near--;
        NodeRef cur = *sptr_node;
        if (unlikely(cur == BVH4::invalidNode)) {
          assert(sptr_node == stack_node);
          break;
        }
        
        /* cull node if behind closest hit point */
        ssef curDist = *sptr_near;
        const sseb active = curDist < ray_tfar;
        if (unlikely(none(active)))
          continue;
        
        /* switch to single ray traversal */
#if !defined(__WIN32__) || defined(__X86_64__)
        size_t bits = movemask(active);
        if (unlikely(__popcnt(bits) <= SWITCH_THRESHOLD)) {
          for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) {
            BVH4Intersector4Single<types,robust,PrimitiveIntersector4>::intersect1(bvh, cur, i, pre, ray, ray_org, ray_dir, rdir, ray_tnear, ray_tfar, nearXYZ);
          }
          ray_tfar = min(ray_tfar,ray.tfar);
          continue;
        }
#endif

        while (1)
        {
	  /* process normal nodes */
          if (likely((types & 0x1) && cur.isNode()))
          {
	    const sseb valid_node = ray_tfar > curDist;
	    STAT3(normal.trav_nodes,1,popcnt(valid_node),4);
	    const Node* __restrict__ const node = cur.node();
	    
	    /* pop of next node */
	    assert(sptr_node > stack_node);
	    sptr_node--;
	    sptr_near--;
	    cur = *sptr_node; 
	    curDist = *sptr_near;
	    
#pragma unroll(4)
	    for (unsigned i=0; i<BVH4::N; i++)
	    {
	      const NodeRef child = node->children[i];
	      if (unlikely(child == BVH4::emptyNode)) break;
	      ssef lnearP; const sseb lhit = node->intersect<robust>(i,org,rdir,org_rdir,ray_tnear,ray_tfar,lnearP);
	      
	      /* if we hit the child we choose to continue with that child if it 
		 is closer than the current next child, or we push it onto the stack */
	      if (likely(any(lhit)))
	      {
		assert(sptr_node < stackEnd);
		assert(child != BVH4::emptyNode);
		const ssef childDist = select(lhit,lnearP,inf);
		sptr_node++;
		sptr_near++;
		
		/* push cur node onto stack and continue with hit child */
		if (any(childDist < curDist))
		{
		  *(sptr_node-1) = cur;
		  *(sptr_near-1) = curDist; 
		  curDist = childDist;
		  cur = child;
		}
		
		/* push hit child onto stack */
		else {
		  *(sptr_node-1) = child;
		  *(sptr_near-1) = childDist; 
		}
	      }     
	    }
#if SWITCH_DURING_DOWN_TRAVERSAL == 1
          // seems to be the best place for testing utilization
          if (unlikely(popcnt(ray_tfar > curDist) <= SWITCH_THRESHOLD))
            {
              *sptr_node++ = cur;
              *sptr_near++ = curDist;
              goto pop;
            }
#endif
	  }
	  
	  /* process motion blur nodes */
          else if (likely((types & 0x10) && cur.isNodeMB()))
	  {
	    const sseb valid_node = ray_tfar > curDist;
	    STAT3(normal.trav_nodes,1,popcnt(valid_node),4);
	    const BVH4::NodeMB* __restrict__ const node = cur.nodeMB();
          
	    /* pop of next node */
	    assert(sptr_node > stack_node);
	    sptr_node--;
	    sptr_near--;
	    cur = *sptr_node; 
	    curDist = *sptr_near;
	    
#pragma unroll(4)
	    for (unsigned i=0; i<BVH4::N; i++)
	    {
	      const NodeRef child = node->child(i);
	      if (unlikely(child == BVH4::emptyNode)) break;
	      ssef lnearP; const sseb lhit = node->intersect(i,org,rdir,org_rdir,ray_tnear,ray_tfar,ray.time,lnearP);
	      
	      /* if we hit the child we choose to continue with that child if it 
		 is closer than the current next child, or we push it onto the stack */
	      if (likely(any(lhit)))
	      {
		assert(sptr_node < stackEnd);
		assert(child != BVH4::emptyNode);
		const ssef childDist = select(lhit,lnearP,inf);
		sptr_node++;
		sptr_near++;
		
		/* push cur node onto stack and continue with hit child */
		if (any(childDist < curDist))
		{
		  *(sptr_node-1) = cur;
		  *(sptr_near-1) = curDist; 
		  curDist = childDist;
		  cur = child;
		}
		
		/* push hit child onto stack */
		else {
		  *(sptr_node-1) = child;
		  *(sptr_near-1) = childDist; 
		}
	      }	      
	    }
#if SWITCH_DURING_DOWN_TRAVERSAL == 1
          // seems to be the best place for testing utilization
          if (unlikely(popcnt(ray_tfar > curDist) <= SWITCH_THRESHOLD))
            {
              *sptr_node++ = cur;
              *sptr_near++ = curDist;
              goto pop;
            }
#endif
	  }
	  else 
	    break;
	}
Esempio n. 5
0
 INLINE operator          ssei ( void ) const { return ssei( _mm256_castsi256_si128(m256)); }
Esempio n. 6
0
  bool BVH2Traverser::occluded(const Ray& ray) const
  {
    /*! stack state */
    int stackPtr = 0;                         //!< current stack pointer
    int stack[1+BVH2<Triangle4>::maxDepth];   //!< stack of nodes that still need to get traversed
    int cur = bvh->root;                      //!< in cur we track the ID of the current node

    /*! precomputed shuffles, to switch lower and upper bounds depending on ray direction */
    const ssei identity = _mm_set_epi8(15, 14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  2,  1, 0);
    const ssei swap     = _mm_set_epi8( 7,  6,  5,  4,  3,  2,  1,  0, 15, 14, 13, 12, 11, 10,  9, 8);
    const ssei shuffleX = ray.dir.x >= 0 ? identity : swap;
    const ssei shuffleY = ray.dir.y >= 0 ? identity : swap;
    const ssei shuffleZ = ray.dir.z >= 0 ? identity : swap;

    /*! load the ray into SIMD registers */
    const ssei pn = ssei(0x00000000,0x00000000,0x80000000,0x80000000);
    const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z);
    const sse3f rdir = sse3f(ssef(ray.rdir.x) ^ pn, ssef(ray.rdir.y) ^ pn, ssef(ray.rdir.z) ^ pn);
    ssef nearFar(ray.near, ray.near, -ray.far, -ray.far);
    BVH2<Triangle4>::Node* nodes = bvh->nodes;

    while (true)
    {
      /*! this is an inner node */
      while (__builtin_expect(cur >= 0, true))
      {
        /*! Single ray intersection with box of both children. See bvh2.h for node layout. */
        const BVH2<Triangle4>::Node& node = bvh->node(nodes,cur);
        const ssef tNearFarX = (shuffle8(node.lower_upper_x,shuffleX) + norg.x) * rdir.x;
        const ssef tNearFarY = (shuffle8(node.lower_upper_y,shuffleY) + norg.y) * rdir.y;
        const ssef tNearFarZ = (shuffle8(node.lower_upper_z,shuffleZ) + norg.z) * rdir.z;
        const ssef tNearFar = max(tNearFarX,tNearFarY,tNearFarZ,nearFar) ^ pn;
        const sseb lrhit = tNearFar <= shuffle8(tNearFar,swap);

        /*! if two children hit, push far node onto stack and continue with closer node */
        if (__builtin_expect(lrhit[0] != 0 && lrhit[1] != 0, true)) {
          if (tNearFar[0] < tNearFar[1]) { stack[stackPtr++] = node.child[1]; cur = node.child[0]; }
          else                           { stack[stackPtr++] = node.child[0]; cur = node.child[1]; }
        }

        /*! if one child hit, continue with that child */
        else {
          if      (lrhit[0] != 0) cur = node.child[0];
          else if (lrhit[1] != 0) cur = node.child[1];
          else goto pop_node;
        }
      }

      /*! leaf node, intersect all triangles */
      {
        cur ^= 0x80000000;
        const size_t ofs = size_t(cur) >> 5;
        const size_t num = size_t(cur) & 0x1F;
        for (size_t i=ofs; i<ofs+num; i++)
          if (bvh->triangles[i].occluded(ray))
            return true;
      }

      /*! pop next node from stack */
pop_node:
      if (__builtin_expect(stackPtr == 0, false)) break;
      cur = stack[--stackPtr];
    }
    return false;
  }
Esempio n. 7
0
  size_t BVH4MB::rotate(Base* nodeID, size_t depth)
  {
    /*! nothing to rotate if we reached a leaf node. */
    if (nodeID->isLeaf()) return 0;
    Node* parent = nodeID->node();

    /*! rotate all children first */
    ssei cdepth;
    for (size_t c=0; c<4; c++)
      cdepth[c] = (int)rotate(parent->child[c],depth+1);

    /* compute current area of all children */
    ssef sizeX = parent->upper_x-parent->lower_x;
    ssef sizeY = parent->upper_y-parent->lower_y;
    ssef sizeZ = parent->upper_z-parent->lower_z;
    ssef childArea = sizeX*(sizeY + sizeZ) + sizeY*sizeZ;

    /*! transpose node bounds */
    ssef plower0,plower1,plower2,plower3; transpose(parent->lower_x,parent->lower_y,parent->lower_z,ssef(zero),plower0,plower1,plower2,plower3);
    ssef pupper0,pupper1,pupper2,pupper3; transpose(parent->upper_x,parent->upper_y,parent->upper_z,ssef(zero),pupper0,pupper1,pupper2,pupper3);
    BBox<ssef> other0(plower0,pupper0), other1(plower1,pupper1), other2(plower2,pupper2), other3(plower3,pupper3);

    /*! Find best rotation. We pick a target child of a first child,
      and swap this with an other child. We perform the best such
      swap. */
    float bestCost = pos_inf;
    int bestChild = -1, bestTarget = -1, bestOther = -1;
    for (size_t c=0; c<4; c++)
    {
      /*! ignore leaf nodes as we cannot descent into */
      if (parent->child[c]->isLeaf()) continue;
      Node* child = parent->child[c]->node();

      /*! transpose child bounds */
      ssef clower0,clower1,clower2,clower3; transpose(child->lower_x,child->lower_y,child->lower_z,ssef(zero),clower0,clower1,clower2,clower3);
      ssef cupper0,cupper1,cupper2,cupper3; transpose(child->upper_x,child->upper_y,child->upper_z,ssef(zero),cupper0,cupper1,cupper2,cupper3);
      BBox<ssef> target0(clower0,cupper0), target1(clower1,cupper1), target2(clower2,cupper2), target3(clower3,cupper3);

      /*! put other0 at each target position */
      float cost00 = halfArea3f(merge(other0 ,target1,target2,target3));
      float cost01 = halfArea3f(merge(target0,other0 ,target2,target3));
      float cost02 = halfArea3f(merge(target0,target1,other0 ,target3));
      float cost03 = halfArea3f(merge(target0,target1,target2,other0 ));
      ssef cost0 = ssef(cost00,cost01,cost02,cost03);
      ssef min0 = vreduce_min(cost0);
      int pos0 = (int)__bsf(movemask(min0 == cost0));

      /*! put other1 at each target position */
      float cost10 = halfArea3f(merge(other1 ,target1,target2,target3));
      float cost11 = halfArea3f(merge(target0,other1 ,target2,target3));
      float cost12 = halfArea3f(merge(target0,target1,other1 ,target3));
      float cost13 = halfArea3f(merge(target0,target1,target2,other1 ));
      ssef cost1 = ssef(cost10,cost11,cost12,cost13);
      ssef min1 = vreduce_min(cost1);
      int pos1 = (int)__bsf(movemask(min1 == cost1));

      /*! put other2 at each target position */
      float cost20 = halfArea3f(merge(other2 ,target1,target2,target3));
      float cost21 = halfArea3f(merge(target0,other2 ,target2,target3));
      float cost22 = halfArea3f(merge(target0,target1,other2 ,target3));
      float cost23 = halfArea3f(merge(target0,target1,target2,other2 ));
      ssef cost2 = ssef(cost20,cost21,cost22,cost23);
      ssef min2 = vreduce_min(cost2);
      int pos2 = (int)__bsf(movemask(min2 == cost2));

      /*! put other3 at each target position */
      float cost30 = halfArea3f(merge(other3 ,target1,target2,target3));
      float cost31 = halfArea3f(merge(target0,other3 ,target2,target3));
      float cost32 = halfArea3f(merge(target0,target1,other3 ,target3));
      float cost33 = halfArea3f(merge(target0,target1,target2,other3 ));
      ssef cost3 = ssef(cost30,cost31,cost32,cost33);
      ssef min3 = vreduce_min(cost3);
      int pos3 = (int)__bsf(movemask(min3 == cost3));

      /*! find best other child */
      ssef otherCost = ssef(extract<0>(min0),extract<0>(min1),extract<0>(min2),extract<0>(min3));
      int pos[4] = { pos0,pos1,pos2,pos3 };
      sseb valid = ssei(int(depth+1))+cdepth <= ssei(maxDepth); // only select swaps that fulfill depth constraints
      if (none(valid)) continue;
      
      size_t n = select_min(valid,otherCost);
      float cost = otherCost[n]-childArea[c]; //< increasing the original child bound is bad, decreasing good

      /*! accept a swap when it reduces cost and is not swapping a node with itself */
      if (cost < bestCost && n != c) {
        bestCost = cost;
        bestChild = (int)c;
        bestOther = (int)n;
        bestTarget = pos[n];
      }
    }

    /*! if we did not find a swap that improves the SAH then do nothing */
    if (bestCost >= 0) return 1+reduce_max(cdepth);

    /*! perform the best found tree rotation */
    Node* child = parent->child[bestChild]->node();
    swap(parent,bestOther,child,bestTarget);
    parent->lower_x[bestChild] = reduce_min(child->lower_x);
    parent->lower_y[bestChild] = reduce_min(child->lower_y);
    parent->lower_z[bestChild] = reduce_min(child->lower_z);
    parent->upper_x[bestChild] = reduce_max(child->upper_x);
    parent->upper_y[bestChild] = reduce_max(child->upper_y);
    parent->upper_z[bestChild] = reduce_max(child->upper_z);
    parent->lower_dx[bestChild] = reduce_min(child->lower_dx);
    parent->lower_dy[bestChild] = reduce_min(child->lower_dy);
    parent->lower_dz[bestChild] = reduce_min(child->lower_dz);
    parent->upper_dx[bestChild] = reduce_max(child->upper_dx);
    parent->upper_dy[bestChild] = reduce_max(child->upper_dy);
    parent->upper_dz[bestChild] = reduce_max(child->upper_dz);

    /*! This returned depth is conservative as the child that was
     *  pulled up in the tree could have been on the critical path. */
    cdepth[bestOther]++; // bestOther was pushed down one level
    return 1+reduce_max(cdepth); 
  }
Esempio n. 8
0
  void BVH2Intersector<TriangleIntersector>::intersect(const Ray& ray, Hit& hit) const
  {
    AVX_ZERO_UPPER();
    STAT3(normal.travs,1,1,1);

    struct StackItem {
      Base* ptr;   //!< node pointer
      float dist;  //!< distance of node
    };

    /*! stack state */
    StackItem stack[1+BVH2::maxDepth];  //!< stack of nodes that still need to get traversed
    StackItem* stackPtr = stack;        //!< current stack pointer
    Base* cur = bvh->root;              //!< in cur we track the ID of the current node

    /*! precomputed shuffles, to switch lower and upper bounds depending on ray direction */
    const ssei identity = _mm_set_epi8(15, 14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  2,  1, 0);
    const ssei swap     = _mm_set_epi8( 7,  6,  5,  4,  3,  2,  1,  0, 15, 14, 13, 12, 11, 10,  9, 8);
    const ssei shuffleX = ray.dir.x >= 0 ? identity : swap;
    const ssei shuffleY = ray.dir.y >= 0 ? identity : swap;
    const ssei shuffleZ = ray.dir.z >= 0 ? identity : swap;

    /*! load the ray into SIMD registers */
    const ssei pn = ssei(0x00000000,0x00000000,0x80000000,0x80000000);
    const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z);
    const sse3f rdir = sse3f(ssef(ray.rdir.x) ^ pn, ssef(ray.rdir.y) ^ pn, ssef(ray.rdir.z) ^ pn);
    ssef nearFar(ray.near, ray.near, -ray.far, -ray.far);
    hit.t = min(hit.t,ray.far);

    while (true)
    {
      /*! downtraversal loop */
      while (likely(cur->isNode()))
      {
        /*! single ray intersection with box of both children. */
        const Node* node = cur->node();
        const ssef tNearFarX = (shuffle8(node->lower_upper_x,shuffleX) + norg.x) * rdir.x;
        const ssef tNearFarY = (shuffle8(node->lower_upper_y,shuffleY) + norg.y) * rdir.y;
        const ssef tNearFarZ = (shuffle8(node->lower_upper_z,shuffleZ) + norg.z) * rdir.z;
        const ssef tNearFar = max(tNearFarX,tNearFarY,tNearFarZ,nearFar) ^ pn;
        const sseb lrhit = tNearFar <= shuffle8(tNearFar,swap);

        /*! if two children hit, push far node onto stack and continue with closer node */
        if (likely(lrhit[0] != 0 && lrhit[1] != 0)) {
          if (likely(tNearFar[0] < tNearFar[1])) { 
            stackPtr->ptr = node->child[1]; 
            stackPtr->dist = tNearFar[1]; 
            cur = node->child[0]; 
            stackPtr++; 
          }
          else { 
            stackPtr->ptr = node->child[0]; 
            stackPtr->dist = tNearFar[0]; 
            cur = node->child[1]; 
            stackPtr++; 
          }
        }

        /*! if one child hit, continue with that child */
        else {
          if      (likely(lrhit[0] != 0)) cur = node->child[0];
          else if (likely(lrhit[1] != 0)) cur = node->child[1];
          else goto pop_node;
        }
      }

      /*! leaf node, intersect all triangles */
      {
        STAT3(shadow.trav_leaves,1,1,1);
        size_t num; Triangle* tri = (Triangle*) cur->leaf(num);
        for (size_t i=0; i<num; i++)
          TriangleIntersector::intersect(ray,hit,tri[i],bvh->vertices);
        nearFar = shuffle<0,1,2,3>(nearFar,-hit.t);
      }

      /*! pop next node from stack */
pop_node:
      if (unlikely(stackPtr == stack)) break;
      --stackPtr;
      cur = stackPtr->ptr;
      if (unlikely(stackPtr->dist > hit.t)) goto pop_node;
    }
    AVX_ZERO_UPPER();
  }
Esempio n. 9
0
  bool BVH2Intersector<TriangleIntersector>::occluded(const Ray& ray) const
  {
    AVX_ZERO_UPPER();

    /*! stack state */
    Base* stack[1+BVH2::maxDepth];   //!< stack of nodes that still need to get traversed
    Base** stackPtr = stack;         //!< current stack pointer
    Base* cur = bvh->root;           //!< in cur we track the ID of the current node

    /*! precomputed shuffles, to switch lower and upper bounds depending on ray direction */
    const ssei identity = _mm_set_epi8(15, 14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  2,  1, 0);
    const ssei swap     = _mm_set_epi8( 7,  6,  5,  4,  3,  2,  1,  0, 15, 14, 13, 12, 11, 10,  9, 8);
    const ssei shuffleX = ray.dir.x >= 0 ? identity : swap;
    const ssei shuffleY = ray.dir.y >= 0 ? identity : swap;
    const ssei shuffleZ = ray.dir.z >= 0 ? identity : swap;

    /*! load the ray into SIMD registers */
    const ssei pn = ssei(0x00000000,0x00000000,0x80000000,0x80000000);
    const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z);
    const sse3f rdir = sse3f(ssef(ray.rdir.x) ^ pn, ssef(ray.rdir.y) ^ pn, ssef(ray.rdir.z) ^ pn);
    ssef nearFar(ray.near, ray.near, -ray.far, -ray.far);

    while (true)
    {
      /*! this is an inner node */
      while (likely(cur->isNode()))
      {
        /*! Single ray intersection with box of both children. See bvh2i.h for node layout. */
        const Node* node = cur->node();
        const ssef tNearFarX = (shuffle8(node->lower_upper_x,shuffleX) + norg.x) * rdir.x;
        const ssef tNearFarY = (shuffle8(node->lower_upper_y,shuffleY) + norg.y) * rdir.y;
        const ssef tNearFarZ = (shuffle8(node->lower_upper_z,shuffleZ) + norg.z) * rdir.z;
        const ssef tNearFar = max(tNearFarX,tNearFarY,tNearFarZ,nearFar) ^ pn;
        const sseb lrhit = tNearFar <= shuffle8(tNearFar,swap);

        /*! if two children hit, push far node onto stack and continue with closer node */
        if (likely(lrhit[0] != 0 && lrhit[1] != 0)) {
          *stackPtr++ = node->child[0]; cur = node->child[1];
        }

        /*! if one child hit, continue with that child */
        else {
          if      (lrhit[0] != 0) cur = node->child[0];
          else if (lrhit[1] != 0) cur = node->child[1];
          else goto pop_node;
        }
      }

      /*! leaf node, intersect all triangles */
      {
        STAT3(shadow.trav_leaves,1,1,1);
        size_t num; Triangle* tri = (Triangle*) cur->leaf(num);
        for (size_t i=0; i<num; i++)
          if (TriangleIntersector::occluded(ray,tri[i],bvh->vertices)) {
            AVX_ZERO_UPPER();
            return true;
          }
      }

      /*! pop next node from stack */
pop_node:
      if (unlikely(stackPtr == stack)) break;
      cur = *(--stackPtr);
    }
    AVX_ZERO_UPPER();
    return false;
  }