Пример #1
0
    static void test() {
        typedef typename Vec::EntryType VecT;
        MemT *data = Vc::malloc<MemT, Vc::AlignOnCacheline>(128);
        for (size_t i = 0; i < 128; ++i) {
            data[i] = static_cast<MemT>(i - 64);
        }

        for (size_t i = 0; i < 128 - Vec::Size + 1; ++i) {
            Vec v;
            if (i % (2 * Vec::Size) == 0) {
                v = Vec(&data[i]);
            } else if (i % Vec::Size == 0) {
                v = Vec(&data[i], Vc::Aligned);
            } else {
                v = Vec(&data[i], Vc::Unaligned);
            }
            for (size_t j = 0; j < Vec::Size; ++j) {
                COMPARE(v[j], static_cast<VecT>(data[i + j])) << " " << TypeInfo<MemT>::string();
            }
        }
        for (size_t i = 0; i < 128 - Vec::Size + 1; ++i) {
            Vec v;
            if (i % (2 * Vec::Size) == 0) {
                v.load(&data[i]);
            } else if (i % Vec::Size == 0) {
                v.load(&data[i], Vc::Aligned);
            } else {
                v.load(&data[i], Vc::Unaligned);
            }
            for (size_t j = 0; j < Vec::Size; ++j) {
                COMPARE(v[j], static_cast<VecT>(data[i + j])) << " " << TypeInfo<MemT>::string();
            }
        }
        for (size_t i = 0; i < 128 - Vec::Size + 1; ++i) {
            Vec v;
            if (i % (2 * Vec::Size) == 0) {
                v = Vec(&data[i], Vc::Streaming);
            } else if (i % Vec::Size == 0) {
                v = Vec(&data[i], Vc::Streaming | Vc::Aligned);
            } else {
                v = Vec(&data[i], Vc::Streaming | Vc::Unaligned);
            }
            for (size_t j = 0; j < Vec::Size; ++j) {
                COMPARE(v[j], static_cast<VecT>(data[i + j])) << " " << TypeInfo<MemT>::string();
            }
        }

        ADD_PASS() << "loadCvt: load " << TypeInfo<MemT>::string() << "* as " << TypeInfo<Vec>::string();
        LoadCvt<Vec, typename SupportedConversions<VecT, MemT>::Next>::test();
    }
Пример #2
0
template<typename Vec> void loadArray()
{
    typedef typename Vec::EntryType T;
    typedef typename Vec::IndexType I;

    enum loadArrayEnum { count = 256 * 1024 / sizeof(T) };
    Vc::Memory<Vec, count> array;
    for (int i = 0; i < count; ++i) {
        array[i] = i;
    }

    const I indexesFromZero(IndexesFromZero);

    const Vec offsets(indexesFromZero);
    for (int i = 0; i < count; i += Vec::Size) {
        const T *const addr = &array[i];
        Vec ii(i);
        ii += offsets;

        Vec a(addr);
        COMPARE(a, ii);

        Vec b = Vec::Zero();
        b.load(addr);
        COMPARE(b, ii);
    }
}
Пример #3
0
template<typename Vec> void loadArrayShort()
{
    typedef typename Vec::EntryType T;

    Vc::Memory<Vec, loadArrayShortCount> array;
    for (int i = 0; i < loadArrayShortCount; ++i) {
        array[i] = i;
    }

    const Vec &offsets = static_cast<Vec>(ushort_v::IndexesFromZero());
    for (int i = 0; i < loadArrayShortCount; i += Vec::Size) {
        const T *const addr = &array[i];
        Vec ii(i);
        ii += offsets;

        Vec a(addr);
        COMPARE(a, ii);

        Vec b = Vec::Zero();
        b.load(addr);
        COMPARE(b, ii);
    }
}
Пример #4
0
/*make sure quartic polynomial is monotonic*/
inline void filter_pqm_monotonicity(Vec *values, uint k, Vec &fv_l, Vec &fv_r, Vec &fd_l, Vec &fd_r){   
   const Vec root_outside = Vec(100.0); //fixed values give to roots clearly outside [0,1], or nonexisting ones*/
   /*second derivative coefficients, eq 23 in white et al.*/
   Vec b0 =   60.0 * values[k] - 24.0 * fv_r - 36.0 * fv_l + 3.0 * (fd_r - 3.0 * fd_l);
   Vec b1 = -360.0 * values[k] + 36.0 * fd_l - 24.0 * fd_r + 168.0 * fv_r + 192.0 * fv_l;
   Vec b2 =  360.0 * values[k] + 30.0 * (fd_r - fd_l) - 180.0 * (fv_l + fv_r);
   /*let's compute sqrt value to be used for computing roots. If we
    take sqrt of negaitve numbers, then we instead set a value that
    will make the root to be +-100 which is well outside range
    of[0,1]. We also guard the sqrt against sqrt with negative
    numbers by doing a max*/
   const Vec sqrt_val = select(b1 * b1 - 4 * b0 * b2 < 0.0, 
                                b1 + 200.0 * b2,
                                sqrt(max(b1 * b1- 4 * b0 * b2, 0.0))); 
   //compute roots. Division is safe with vectorclass (=inf)
   const Vec root1 = (-b1 + sqrt_val) / (2 * b2);
   const Vec root2 = (-b1 - sqrt_val) / (2 * b2);

   /*PLM slope, MC limiter*/
   Vec plm_slope_l = 2.0 * (values[k] - values[k - 1]);
   Vec plm_slope_r = 2.0 * (values[k + 1] - values[k]);
   Vec slope_sign = plm_slope_l + plm_slope_r; //it also has some magnitude, but we will only use its sign.
   /*first derivative coefficients*/
   const Vec c0 = fd_l;
   const Vec c1 = b0;
   const Vec c2 = b1 / 2.0;
   const Vec c3 = b2 / 3.0;
   //compute both slopes at inflexion points, at least one of these
   //is with [0..1]. If the root is not in this range, we
   //simplify later if statements by setting it to the plm slope
   //sign
   Vec root1_slope = select(root1 >= 0.0 && root1 <= 1.0,
                             c0  + root1 * ( c1 + root1 * (c2 + root1 * c3 ) ),
                             slope_sign);
   Vec root2_slope = select(root2 >= 0.0 && root2 <= 1.0,
                            c0  + root2 * ( c1 + root2 * (c2 + root2 * c3 ) ),
                            slope_sign);
   Vecb fixInflexion = root1_slope * slope_sign < 0.0 || root2_slope * slope_sign < 0.0;
   if (horizontal_or (fixInflexion) ){ 
      Realv valuesa[VECL];
      Realv fva_l[VECL];
      Realv fva_r[VECL];
      Realv fda_l[VECL];
      Realv fda_r[VECL];
      Realv slope_signa[VECL];
      values[k].store(valuesa);
      fv_l.store(fva_l);
      fd_l.store(fda_l);
      fv_r.store(fva_r);
      fd_r.store(fda_r);
      slope_sign.store(slope_signa);
      
      //todo store and then load data to avoid inserts (is it beneficial...?)
      
//serialized the handling of inflexion points, these do not happen for smooth regions
      for(uint i = 0;i < VECL; i++) {
         if(fixInflexion[i]){
            //need to collapse, at least one inflexion point has wrong
            //sign.
            if(fabs(plm_slope_l[i]) <= fabs(plm_slope_r[i])) {
               //collapse to left edge (eq 21)
               fda_l[i] =  1.0 / 3.0 * ( 10 * valuesa[i] - 2.0 * fva_r[i] - 8.0 * fva_l[i]);
               fda_r[i] =  -10.0 * valuesa[i] + 6.0 * fva_r[i] + 4.0 * fva_l[i];
               //check if PLM slope is consistent (eq 28 & 29)
               if (slope_signa[i] * fda_l[i] < 0) {
                  fda_l[i] =  0;
                  fva_r[i] =  5 * valuesa[i] - 4 * fva_l[i];
                  fda_r[i] =  20 * (valuesa[i] - fva_l[i]);
               }
               else if (slope_signa[i] * fda_r[i] < 0) {
                  fda_r[i] =  0;
                  fva_l[i] =  0.5 * (5 * valuesa[i] - 3 * fva_r[i]);
                  fda_l[i] =  10.0 / 3.0 * (-valuesa[i] + fva_r[i]);
               }
            }
            else {
               //collapse to right edge (eq 21)
               fda_l[i] =  10.0 * valuesa[i] - 6.0 * fva_l[i] - 4.0 * fva_r[i];
               fda_r[i] =  1.0 / 3.0 * ( - 10.0 * valuesa[i] + 2 * fva_l[i] + 8 * fva_r[i]);
               //check if PLM slope is consistent (eq 28 & 29)
               if (slope_signa[i] * fda_l[i] < 0) {
                  fda_l[i] =  0;
                  fva_r[i] =  0.5 * ( 5 * valuesa[i] - 3 * fva_l[i]);
                  fda_r[i] =  10.0 / 3.0 * (valuesa[i] - fva_l[i]);
               }
               else if (slope_signa[i] * fda_r[i] < 0) {
                  fda_r[i] =  0;
                  fva_l[i] =  5 * valuesa[i] - 4 * fva_r[i];
                  fda_l[i] =  20.0 * ( - valuesa[i] + fva_r[i]);
               }
            }
         }
      }      
      fv_l.load(fva_l);
      fd_l.load(fda_l);
      fv_r.load(fva_r);
      fd_r.load(fda_r);
   }
}