__mmask BVH4AOSTriangle1Intersector16Single::occluded(const BVH4AOSTriangle1Intersector16Single* This, Ray16& ray, const __mmask valid_i)
  {
    /* pointers to node array and triangle array */
    const mic_m valid = valid_i;
    const BVH4AOS* bvh = This->bvh;
    const Node*     const nodes     = (const Node*    ) bvh->nodePtr();
    const Triangle1 *const triangles = (const Triangle1*) bvh->triPtr();

    mic_i not_occluded = mic_i::minus_one();
    long rayIndex = -1;
    while((rayIndex = bsf64(rayIndex,valid)) != MIC_NO_BIT_SET_64)      
    {       
      // === TODO: precompute SOAtoAOS transformation, load with 4x broadcast
      const mic3f ray_rdir     = rcp_safe(ray.dir);
      const mic_f org_xyz      = SOAtoAOS_4f(rayIndex,ray.org.x,ray.org.y,ray.org.z);
      const mic_f dir_xyz      = SOAtoAOS_4f(rayIndex,ray.dir.x,ray.dir.y,ray.dir.z);
      const mic_f rdir_xyz     = SOAtoAOS_4f(rayIndex,ray_rdir.x,ray_rdir.y,ray_rdir.z);
      const mic_f org_rdir_xyz = org_xyz * rdir_xyz;
      const mic_f min_dist_xyz = upconv1f(&ray.tnear[rayIndex]); 
      const mic_f max_dist_xyz = upconv1f(&ray.tfar[rayIndex]); 
      if (BVH4AOSTriangle1Intersector1::occluded1(bvh,nodes,triangles,bvh->root,rayIndex,org_xyz,dir_xyz,rdir_xyz,org_rdir_xyz,min_dist_xyz,max_dist_xyz))
        not_occluded[rayIndex] = 0;
    }
    return valid & eq(not_occluded,mic_i::zero());
  }
  void BVH4AOSTriangle1Intersector16Single::intersect(const BVH4AOSTriangle1Intersector16Single* This, Ray16& ray, const __mmask valid_i)
  {
    /* pointers to node array and triangle array */
    const mic_m valid = valid_i;
    const BVH4AOS* bvh = This->bvh;
    const Node*     const nodes     = (const Node*    ) bvh->nodePtr();
    const Triangle1 *const triangles = (const Triangle1*) bvh->triPtr();

    long rayIndex = -1;
    while((rayIndex = bsf64(rayIndex,valid)) != MIC_NO_BIT_SET_64)      
    {       
      const mic3f ray_rdir     = rcp_safe(ray.dir);
      const mic_f org_xyz      = SOAtoAOS_4f(rayIndex,ray.org.x,ray.org.y,ray.org.z);
      const mic_f dir_xyz      = SOAtoAOS_4f(rayIndex,ray.dir.x,ray.dir.y,ray.dir.z);
      const mic_f rdir_xyz     = SOAtoAOS_4f(rayIndex,ray_rdir.x,ray_rdir.y,ray_rdir.z);
      const mic_f org_rdir_xyz = org_xyz * rdir_xyz;
      const mic_f min_dist_xyz = upconv1f(&ray.tnear[rayIndex]); 
      const mic_f max_dist_xyz = upconv1f(&ray.tfar[rayIndex]); 
      BVH4AOSTriangle1Intersector1::intersect1(bvh,nodes,triangles,bvh->root,rayIndex,org_xyz,dir_xyz,rdir_xyz,org_rdir_xyz,min_dist_xyz,max_dist_xyz,ray);
    }
  }
Exemplo n.º 3
0
int mtrropt(u64t wc_addr, u64t wc_len, u32t *memlimit) {
   u32t        reg;
   int          ii, 
            sv4idx = 0;
   mtrrentry save4[16];
   memset(&save4,0,sizeof(save4));
   *memlimit = 0;

   if (is_included(wc_addr,wc_len)<0 && is_intersection(wc_addr, wc_len)<0 &&
      is_regavail(&reg))
   {
      mtrr[reg].start = wc_addr;
      mtrr[reg].len   = wc_len;
      mtrr[reg].cache = MTRRF_WC;
      mtrr[reg].on    = 1;
      return 0;
   }
   // video memory in not in 4th GB
   if (wc_addr<_3GbLL || wc_addr+wc_len>_4GbLL) return OPTERR_VIDMEM3GB;
   /* turn off previous write combine on the same memory,
      but leave this block to catch low UC border successfully */
   ii = is_include(wc_addr, wc_len);
   if (ii>=0 && mtrr[ii].cache==MTRRF_WC) mtrr[ii].cache=MTRRF_UC;
   // only WB and UC allowed in first 4Gb
   for (ii=0; ii<regs; ii++) 
      if (mtrr[ii].on && mtrr[ii].cache!=MTRRF_UC && mtrr[ii].cache!=MTRRF_WB
         && mtrr[ii].start<_4GbLL) return OPTERR_UNKCT;
   // is block intersected with someone?
   ii = is_intersection(wc_addr, wc_len);
   if (ii>=0) return OPTERR_INTERSECT;
   // remove/truncate all above 4Gb (but save it)
   for (ii=0; ii<regs; ii++)
      if (mtrr[ii].on)
         if (mtrr[ii].start<_4GbLL && mtrr[ii].start+mtrr[ii].len>_4GbLL) {
            u64t newlen = _4GbLL - mtrr[ii].start, 
                 remain = mtrr[ii].len - newlen;
            if (!is_power2(newlen)) return OPTERR_SPLIT4GB;
            mtrr[ii].len = newlen;
            // save block
            if (is_power2(remain)) {
               save4[sv4idx].start = _4GbLL;
               save4[sv4idx].len   = remain;
               save4[sv4idx].cache = mtrr[ii].cache;
               sv4idx++;
            } else
            if (is_power2(remain/3)) {
            }
         } else
         if (mtrr[ii].start>=_4GbLL || mtrr[ii].start+mtrr[ii].len>_4GbLL) {
            save4[sv4idx].start = mtrr[ii].start;
            save4[sv4idx].len   = mtrr[ii].len;
            save4[sv4idx].cache = mtrr[ii].cache;
            sv4idx++;
            clearreg(ii);
         }
   u64t  wbend = 0,
       ucstart = FFFF64;
   // searching for upper WB border
   for (ii=0; ii<regs; ii++)
      if (mtrr[ii].on)
         if (mtrr[ii].cache==MTRRF_WB)
            if (mtrr[ii].start+mtrr[ii].len > wbend)
               wbend = mtrr[ii].start+mtrr[ii].len;
   // searching for lower UC border (but ignore small blocks)
   for (ii=0; ii<regs; ii++)
      if (mtrr[ii].on)
         if (mtrr[ii].cache==MTRRF_UC) {
            int pwr = bsf64(mtrr[ii].len);
            if (pwr>=27) {
               if (ucstart>mtrr[ii].start) ucstart = mtrr[ii].start;
               clearreg(ii);
            }
         }
   // pass #2 - removing small blocks above selected border
   for (ii=0; ii<regs; ii++)
      if (mtrr[ii].on)
         if (mtrr[ii].cache==MTRRF_UC && ucstart<=mtrr[ii].start)
            clearreg(ii);
   // if no UC entries - use the end of WB as border
   if (ucstart>wbend) ucstart = wbend;
   // this can occur on small video memory size (<128Mb)
   if (wc_addr<ucstart) return OPTERR_BELOWUC;
   // build new WB list
   if (ucstart<wbend) {
      if (ucstart<_1GbLL) return OPTERR_LOWUC;

      for (ii=0; ii<regs; ii++)
         if (mtrr[ii].on)
            if (mtrr[ii].cache==MTRRF_WB) clearreg(ii);

      int regsfree = regsavail() - sv4idx - 1;
      log_it(2, "regs free: %i \n", regsfree);
      // force 3 registers (some memory above 4Gb can be lost)
      if (regsfree<3) regsfree = 3;

      // split memory to list
      u64t nextpos = 0;
      ii = 0;
      for (u64t size=_2GbLL; size>=_64MbLL; size>>=1) {
         if (ucstart>=size) {
            if (!is_regavail(&reg)) return OPTERR_NOREG;
            mtrr[reg].start = nextpos;
            nextpos += (mtrr[reg].len = size);
            mtrr[reg].cache = MTRRF_WB;
            mtrr[reg].on    = 1;
            ucstart -= size;
            // use only 3 mtrr regs
            if (++ii==regsfree) break;
         }
      }
      // save memlimit value
      *memlimit = nextpos>>20;
      /** and again removing small blocks above selected border...
         splitted blocks sum can be smaller than previously selected
         UC border and some blocks can be cleared here */
      for (ii=0; ii<regs; ii++)
         if (mtrr[ii].on)
            if (mtrr[ii].cache==MTRRF_UC && nextpos<=mtrr[ii].start)
               clearreg(ii);
   }
   // final check 
   if (is_included(wc_addr,wc_len)>=0 || is_intersection(wc_addr,wc_len)>=0 ||
      is_include(wc_addr,wc_len)>=0) return OPTERR_OPTERR;
   // add entry
   if (is_regavail(&reg)) {
      mtrr[reg].start = wc_addr;
      mtrr[reg].len   = wc_len;
      mtrr[reg].cache = MTRRF_WC;
      mtrr[reg].on    = 1;
   }
   // restore some of above 4Gb memory blocks
   if (sv4idx && regsavail()>0) {
      for (ii=0; ii<sv4idx; ii++) {
         if (!is_regavail(&reg)) break;
         mtrr[reg].start = save4[ii].start;
         mtrr[reg].len   = save4[ii].len;
         mtrr[reg].cache = save4[ii].cache;
         mtrr[reg].on    = 1;
      }
      // check lost items for included UC entries
      while (ii<sv4idx) {
         if (mtrr[ii].cache==MTRRF_UC) {
            int idx = is_included(save4[ii].start,save4[ii].len);
            if (idx<0) idx = is_intersection(save4[ii].start,save4[ii].len);
            // check it multiple times (for intersection)
            if (idx>=0) { clearreg(idx); continue; }
         }
         ii++;
      }
   }
   return 0;
}
Exemplo n.º 4
0
/// return 0/1
static int is_power2(u64t length) {
   if (length)
      if ((u64t)1<<bsf64(length)==length) return 1;
   return 0;
}