示例#1
0
文件: hiz.cpp 项目: binhpt/point-frag
  // We create packets and directly fill each zBuffer tile. Note that we
  // really store t values
  void TaskRayTraceHiZ::run(size_t taskID)
  {
    const uint32 taskX = taskID % this->taskXNum;
    const uint32 taskY = taskID / this->taskXNum;
    const uint32 startX = taskX * this->width;
    const uint32 startY = taskY * this->height;
    const uint32 endX = startX + this->width;
    const uint32 endY = startY + this->height;
    uint32 tileY = startY / HiZ::Tile::height;

    for (uint32 y = startY; y < endY; y += RayPacket::height, ++tileY) {
      uint32 tileX = startX / HiZ::Tile::width;
      for (uint32 x = startX; x < endX; x += RayPacket::width, ++tileX) {
        RayPacket pckt;
        PacketHit hit;
        gen.generate(pckt, x, y);
        intersector->traverse(pckt, hit);
        ssef zmin(inf), zmax(neg_inf);
        const uint32 tileID = tileX + tileY * zBuffer->tileXNum;
        PF_ASSERT(tileID < zBuffer->tileNum);
        HiZ::Tile &tile = zBuffer->tiles[tileID];
        for (uint32 chunkID = 0; chunkID < HiZ::Tile::chunkNum; ++chunkID) {
          //const ssef t = hit.t[chunkID];
          const ssef t = hit.t[chunkID] *dot(sse3f(view.x,view.y,view.z), pckt.dir[chunkID]);
          tile.z[chunkID] = t;
          zmin = min(zmin, t);
          zmax = max(zmax, t);
        }
        tile.zmin = reduce_min(zmin)[0];
        tile.zmax = reduce_max(zmax)[0];
      }
    }
  }
示例#2
0
文件: hiz.cpp 项目: binhpt/point-frag
 PerspectiveFrustum::PerspectiveFrustum(const RTCamera &cam, Ref<HiZ> hiz)
   : hiz(hiz)
 {
   this->org_aos  = ssef(cam.org.x, cam.org.y, cam.org.z, 0.f);
   this->view_aos = ssef(cam.view.x, cam.view.y, cam.view.z, 0.f);
   this->org   = sse3f(cam.org.x, cam.org.y, cam.org.z);
   this->view  = sse3f(cam.view.x, cam.view.y, cam.view.z);
   this->xAxis = sse3f(cam.xAxis.x, cam.xAxis.y, cam.xAxis.z);
   this->zAxis = sse3f(cam.zAxis.x, cam.zAxis.y, cam.zAxis.z);
   this->yMaxSinAngle = sin(cam.fov * float(pi) / 360.f);
   this->xMaxSinAngle = cam.ratio * this->yMaxSinAngle;
   this->yMaxInvTanAngle = 1.f / tan(cam.fov * float(pi) / 360.f);
   this->xMaxInvTanAngle = this->yMaxInvTanAngle / cam.ratio;
   this->xAxis = normalize(this->xAxis);
   this->zAxis = normalize(this->zAxis);
   this->windowing = ssef(float(hiz->tileXNum) * 0.5f,
                          float(hiz->tileXNum) * 0.5f,
                          float(hiz->tileYNum) * 0.5f,
                          float(hiz->tileYNum) * 0.5f);
   this->hizExtent = ssef(float(hiz->tileXNum-1),
                          float(hiz->tileXNum-1),
                          float(hiz->tileYNum-1),
                          float(hiz->tileYNum-1));
 }
    void BVH4Intersector4Hybrid<PrimitiveIntersector4>::occluded(sseb* valid_i, BVH4* bvh, Ray4& ray)
    {
      /* load ray */
      const sseb valid = *valid_i;
      sseb terminated = !valid;
      sse3f ray_org = ray.org, ray_dir = ray.dir;
      ssef ray_tnear = ray.tnear, ray_tfar  = ray.tfar;
#if defined(__FIX_RAYS__)
      const ssef float_range = 0.1f*FLT_MAX;
      ray_org = clamp(ray_org,sse3f(-float_range),sse3f(+float_range));
      ray_dir = clamp(ray_dir,sse3f(-float_range),sse3f(+float_range));
      ray_tnear = max(ray_tnear,FLT_MIN); 
      ray_tfar  = min(ray_tfar,float(inf)); 
#endif
      const sse3f rdir = rcp_safe(ray_dir);
      const sse3f org(ray_org), org_rdir = org * rdir;
      ray_tnear = select(valid,ray_tnear,ssef(pos_inf));
      ray_tfar  = select(valid,ray_tfar ,ssef(neg_inf));
      const ssef inf = ssef(pos_inf);
      
      /* allocate stack and push root node */
      ssef    stack_near[stackSizeChunk];
      NodeRef stack_node[stackSizeChunk];
      stack_node[0] = BVH4::invalidNode;
      stack_near[0] = inf;
      stack_node[1] = bvh->root;
      stack_near[1] = ray_tnear; 
      NodeRef* stackEnd = stack_node+stackSizeChunk;
      NodeRef* __restrict__ sptr_node = stack_node + 2;
      ssef*    __restrict__ sptr_near = stack_near + 2;
      
      while (1)
      {
        /* pop next node from stack */
        assert(sptr_node > stack_node);
        sptr_node--;
        sptr_near--;
        NodeRef curNode = *sptr_node;
        if (unlikely(curNode == BVH4::invalidNode)) {
          assert(sptr_node == stack_node);
          break;
        }

        /* cull node if behind closest hit point */
        ssef curDist = *sptr_near;
        const sseb active = curDist < ray_tfar;
        if (unlikely(none(active))) 
          continue;
        
        /* switch to single ray traversal */
#if !defined(__WIN32__) || defined(__X86_64__)
        size_t bits = movemask(active);
        if (unlikely(__popcnt(bits) <= SWITCH_THRESHOLD)) {
          for (size_t i=__bsf(bits); bits!=0; bits=__btc(bits,i), i=__bsf(bits)) {
            if (occluded1(bvh,curNode,i,ray,ray_org,ray_dir,rdir,ray_tnear,ray_tfar))
              terminated[i] = -1;
          }
          if (all(terminated)) break;
          ray_tfar = select(terminated,ssef(neg_inf),ray_tfar);
          continue;
        }
#endif

        while (1)
        {
          /* test if this is a leaf node */
          if (unlikely(curNode.isLeaf()))
            break;
          
          const sseb valid_node = ray_tfar > curDist;
          STAT3(shadow.trav_nodes,1,popcnt(valid_node),4);
          const Node* __restrict__ const node = curNode.node();
          
          /* pop of next node */
          assert(sptr_node > stack_node);
          sptr_node--;
          sptr_near--;
          curNode = *sptr_node;
          curDist = *sptr_near;
          
#pragma unroll(4)
          for (unsigned i=0; i<4; i++)
          {
            const NodeRef child = node->children[i];
            if (unlikely(child == BVH4::emptyNode)) break;
            
#if defined(__AVX2__)
            const ssef lclipMinX = msub(node->lower_x[i],rdir.x,org_rdir.x);
            const ssef lclipMinY = msub(node->lower_y[i],rdir.y,org_rdir.y);
            const ssef lclipMinZ = msub(node->lower_z[i],rdir.z,org_rdir.z);
            const ssef lclipMaxX = msub(node->upper_x[i],rdir.x,org_rdir.x);
            const ssef lclipMaxY = msub(node->upper_y[i],rdir.y,org_rdir.y);
            const ssef lclipMaxZ = msub(node->upper_z[i],rdir.z,org_rdir.z);
#else
            const ssef lclipMinX = (node->lower_x[i] - org.x) * rdir.x;
            const ssef lclipMinY = (node->lower_y[i] - org.y) * rdir.y;
            const ssef lclipMinZ = (node->lower_z[i] - org.z) * rdir.z;
            const ssef lclipMaxX = (node->upper_x[i] - org.x) * rdir.x;
            const ssef lclipMaxY = (node->upper_y[i] - org.y) * rdir.y;
            const ssef lclipMaxZ = (node->upper_z[i] - org.z) * rdir.z;
#endif
    
#if defined(__SSE4_1__)
            const ssef lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
            const ssef lfarP  = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
            const sseb lhit   = maxi(lnearP,ray_tnear) <= mini(lfarP,ray_tfar);      
#else
            const ssef lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
            const ssef lfarP  = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
            const sseb lhit   = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar);      
#endif
            
            /* if we hit the child we choose to continue with that child if it 
               is closer than the current next child, or we push it onto the stack */
            if (likely(any(lhit)))
            {
              assert(sptr_node < stackEnd);
              assert(child != BVH4::emptyNode);
              const ssef childDist = select(lhit,lnearP,inf);
              sptr_node++;
              sptr_near++;
              
              /* push cur node onto stack and continue with hit child */
              if (any(childDist < curDist))
              {
                *(sptr_node-1) = curNode;
                *(sptr_near-1) = curDist; 
                curDist = childDist;
                curNode = child;
              }
              
              /* push hit child onto stack */
              else {
                *(sptr_node-1) = child;
                *(sptr_near-1) = childDist; 
              }
            }	      
          }
        }
        
        /* return if stack is empty */
        if (unlikely(curNode == BVH4::invalidNode)) {
          assert(sptr_node == stack_node);
          break;
        }
        
        /* intersect leaf */
        const sseb valid_leaf = ray_tfar > curDist;
        STAT3(shadow.trav_leaves,1,popcnt(valid_leaf),4);
        size_t items; const Primitive* prim = (Primitive*) curNode.leaf(items);
        terminated |= PrimitiveIntersector4::occluded(!terminated,ray,prim,items,bvh->geometry);
        if (all(terminated)) break;
        ray_tfar = select(terminated,ssef(neg_inf),ray_tfar);
      }
      store4i(valid & terminated,&ray.geomID,0);
      AVX_ZERO_UPPER();
    }
示例#4
0
  bool BVH2Traverser::occluded(const Ray& ray) const
  {
    /*! stack state */
    int stackPtr = 0;                         //!< current stack pointer
    int stack[1+BVH2<Triangle4>::maxDepth];   //!< stack of nodes that still need to get traversed
    int cur = bvh->root;                      //!< in cur we track the ID of the current node

    /*! precomputed shuffles, to switch lower and upper bounds depending on ray direction */
    const ssei identity = _mm_set_epi8(15, 14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  2,  1, 0);
    const ssei swap     = _mm_set_epi8( 7,  6,  5,  4,  3,  2,  1,  0, 15, 14, 13, 12, 11, 10,  9, 8);
    const ssei shuffleX = ray.dir.x >= 0 ? identity : swap;
    const ssei shuffleY = ray.dir.y >= 0 ? identity : swap;
    const ssei shuffleZ = ray.dir.z >= 0 ? identity : swap;

    /*! load the ray into SIMD registers */
    const ssei pn = ssei(0x00000000,0x00000000,0x80000000,0x80000000);
    const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z);
    const sse3f rdir = sse3f(ssef(ray.rdir.x) ^ pn, ssef(ray.rdir.y) ^ pn, ssef(ray.rdir.z) ^ pn);
    ssef nearFar(ray.near, ray.near, -ray.far, -ray.far);
    BVH2<Triangle4>::Node* nodes = bvh->nodes;

    while (true)
    {
      /*! this is an inner node */
      while (__builtin_expect(cur >= 0, true))
      {
        /*! Single ray intersection with box of both children. See bvh2.h for node layout. */
        const BVH2<Triangle4>::Node& node = bvh->node(nodes,cur);
        const ssef tNearFarX = (shuffle8(node.lower_upper_x,shuffleX) + norg.x) * rdir.x;
        const ssef tNearFarY = (shuffle8(node.lower_upper_y,shuffleY) + norg.y) * rdir.y;
        const ssef tNearFarZ = (shuffle8(node.lower_upper_z,shuffleZ) + norg.z) * rdir.z;
        const ssef tNearFar = max(tNearFarX,tNearFarY,tNearFarZ,nearFar) ^ pn;
        const sseb lrhit = tNearFar <= shuffle8(tNearFar,swap);

        /*! if two children hit, push far node onto stack and continue with closer node */
        if (__builtin_expect(lrhit[0] != 0 && lrhit[1] != 0, true)) {
          if (tNearFar[0] < tNearFar[1]) { stack[stackPtr++] = node.child[1]; cur = node.child[0]; }
          else                           { stack[stackPtr++] = node.child[0]; cur = node.child[1]; }
        }

        /*! if one child hit, continue with that child */
        else {
          if      (lrhit[0] != 0) cur = node.child[0];
          else if (lrhit[1] != 0) cur = node.child[1];
          else goto pop_node;
        }
      }

      /*! leaf node, intersect all triangles */
      {
        cur ^= 0x80000000;
        const size_t ofs = size_t(cur) >> 5;
        const size_t num = size_t(cur) & 0x1F;
        for (size_t i=ofs; i<ofs+num; i++)
          if (bvh->triangles[i].occluded(ray))
            return true;
      }

      /*! pop next node from stack */
pop_node:
      if (__builtin_expect(stackPtr == 0, false)) break;
      cur = stack[--stackPtr];
    }
    return false;
  }
示例#5
0
  void BVH2Intersector<TriangleIntersector>::intersect(const Ray& ray, Hit& hit) const
  {
    AVX_ZERO_UPPER();
    STAT3(normal.travs,1,1,1);

    struct StackItem {
      Base* ptr;   //!< node pointer
      float dist;  //!< distance of node
    };

    /*! stack state */
    StackItem stack[1+BVH2::maxDepth];  //!< stack of nodes that still need to get traversed
    StackItem* stackPtr = stack;        //!< current stack pointer
    Base* cur = bvh->root;              //!< in cur we track the ID of the current node

    /*! precomputed shuffles, to switch lower and upper bounds depending on ray direction */
    const ssei identity = _mm_set_epi8(15, 14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  2,  1, 0);
    const ssei swap     = _mm_set_epi8( 7,  6,  5,  4,  3,  2,  1,  0, 15, 14, 13, 12, 11, 10,  9, 8);
    const ssei shuffleX = ray.dir.x >= 0 ? identity : swap;
    const ssei shuffleY = ray.dir.y >= 0 ? identity : swap;
    const ssei shuffleZ = ray.dir.z >= 0 ? identity : swap;

    /*! load the ray into SIMD registers */
    const ssei pn = ssei(0x00000000,0x00000000,0x80000000,0x80000000);
    const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z);
    const sse3f rdir = sse3f(ssef(ray.rdir.x) ^ pn, ssef(ray.rdir.y) ^ pn, ssef(ray.rdir.z) ^ pn);
    ssef nearFar(ray.near, ray.near, -ray.far, -ray.far);
    hit.t = min(hit.t,ray.far);

    while (true)
    {
      /*! downtraversal loop */
      while (likely(cur->isNode()))
      {
        /*! single ray intersection with box of both children. */
        const Node* node = cur->node();
        const ssef tNearFarX = (shuffle8(node->lower_upper_x,shuffleX) + norg.x) * rdir.x;
        const ssef tNearFarY = (shuffle8(node->lower_upper_y,shuffleY) + norg.y) * rdir.y;
        const ssef tNearFarZ = (shuffle8(node->lower_upper_z,shuffleZ) + norg.z) * rdir.z;
        const ssef tNearFar = max(tNearFarX,tNearFarY,tNearFarZ,nearFar) ^ pn;
        const sseb lrhit = tNearFar <= shuffle8(tNearFar,swap);

        /*! if two children hit, push far node onto stack and continue with closer node */
        if (likely(lrhit[0] != 0 && lrhit[1] != 0)) {
          if (likely(tNearFar[0] < tNearFar[1])) { 
            stackPtr->ptr = node->child[1]; 
            stackPtr->dist = tNearFar[1]; 
            cur = node->child[0]; 
            stackPtr++; 
          }
          else { 
            stackPtr->ptr = node->child[0]; 
            stackPtr->dist = tNearFar[0]; 
            cur = node->child[1]; 
            stackPtr++; 
          }
        }

        /*! if one child hit, continue with that child */
        else {
          if      (likely(lrhit[0] != 0)) cur = node->child[0];
          else if (likely(lrhit[1] != 0)) cur = node->child[1];
          else goto pop_node;
        }
      }

      /*! leaf node, intersect all triangles */
      {
        STAT3(shadow.trav_leaves,1,1,1);
        size_t num; Triangle* tri = (Triangle*) cur->leaf(num);
        for (size_t i=0; i<num; i++)
          TriangleIntersector::intersect(ray,hit,tri[i],bvh->vertices);
        nearFar = shuffle<0,1,2,3>(nearFar,-hit.t);
      }

      /*! pop next node from stack */
pop_node:
      if (unlikely(stackPtr == stack)) break;
      --stackPtr;
      cur = stackPtr->ptr;
      if (unlikely(stackPtr->dist > hit.t)) goto pop_node;
    }
    AVX_ZERO_UPPER();
  }
示例#6
0
  bool BVH2Intersector<TriangleIntersector>::occluded(const Ray& ray) const
  {
    AVX_ZERO_UPPER();

    /*! stack state */
    Base* stack[1+BVH2::maxDepth];   //!< stack of nodes that still need to get traversed
    Base** stackPtr = stack;         //!< current stack pointer
    Base* cur = bvh->root;           //!< in cur we track the ID of the current node

    /*! precomputed shuffles, to switch lower and upper bounds depending on ray direction */
    const ssei identity = _mm_set_epi8(15, 14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  2,  1, 0);
    const ssei swap     = _mm_set_epi8( 7,  6,  5,  4,  3,  2,  1,  0, 15, 14, 13, 12, 11, 10,  9, 8);
    const ssei shuffleX = ray.dir.x >= 0 ? identity : swap;
    const ssei shuffleY = ray.dir.y >= 0 ? identity : swap;
    const ssei shuffleZ = ray.dir.z >= 0 ? identity : swap;

    /*! load the ray into SIMD registers */
    const ssei pn = ssei(0x00000000,0x00000000,0x80000000,0x80000000);
    const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z);
    const sse3f rdir = sse3f(ssef(ray.rdir.x) ^ pn, ssef(ray.rdir.y) ^ pn, ssef(ray.rdir.z) ^ pn);
    ssef nearFar(ray.near, ray.near, -ray.far, -ray.far);

    while (true)
    {
      /*! this is an inner node */
      while (likely(cur->isNode()))
      {
        /*! Single ray intersection with box of both children. See bvh2i.h for node layout. */
        const Node* node = cur->node();
        const ssef tNearFarX = (shuffle8(node->lower_upper_x,shuffleX) + norg.x) * rdir.x;
        const ssef tNearFarY = (shuffle8(node->lower_upper_y,shuffleY) + norg.y) * rdir.y;
        const ssef tNearFarZ = (shuffle8(node->lower_upper_z,shuffleZ) + norg.z) * rdir.z;
        const ssef tNearFar = max(tNearFarX,tNearFarY,tNearFarZ,nearFar) ^ pn;
        const sseb lrhit = tNearFar <= shuffle8(tNearFar,swap);

        /*! if two children hit, push far node onto stack and continue with closer node */
        if (likely(lrhit[0] != 0 && lrhit[1] != 0)) {
          *stackPtr++ = node->child[0]; cur = node->child[1];
        }

        /*! if one child hit, continue with that child */
        else {
          if      (lrhit[0] != 0) cur = node->child[0];
          else if (lrhit[1] != 0) cur = node->child[1];
          else goto pop_node;
        }
      }

      /*! leaf node, intersect all triangles */
      {
        STAT3(shadow.trav_leaves,1,1,1);
        size_t num; Triangle* tri = (Triangle*) cur->leaf(num);
        for (size_t i=0; i<num; i++)
          if (TriangleIntersector::occluded(ray,tri[i],bvh->vertices)) {
            AVX_ZERO_UPPER();
            return true;
          }
      }

      /*! pop next node from stack */
pop_node:
      if (unlikely(stackPtr == stack)) break;
      cur = *(--stackPtr);
    }
    AVX_ZERO_UPPER();
    return false;
  }