Example #1
0
 static void exec(LHS &lhs, RHS const &rhs)
 {
     expr::evaluate(rhs);
     length_type const size = lhs.size(1, 0);
     for (index_type i=0; i<size; ++i)
         lhs.put(i, rhs.get(i));
 }
  static void exec(LHS &lhs, RHS const &rhs)
  {
    using namespace impl;
    typedef typename simd::LValue_access_traits<typename LHS::value_type> WAT;
    typedef typename simd::Proxy_factory<RHS, false>::access_traits EAT;

    length_type const vec_size =
      simd::Simd_traits<typename LHS::value_type>::vec_size;
    Ext_data<LHS, layout_type> dda(lhs, SYNC_OUT);

    simd::Proxy<WAT,true>  lp(dda.data());
    simd::Proxy<EAT,false> rp(simd::Proxy_factory<RHS,false>::create(rhs));

    length_type const size = dda.size(0);
    length_type n = size;

    // loop using proxy interface. This generates the best code
    // with gcc 3.4 (with gcc 4.1 the difference to the first case
    // above is negligible).

    while (n >= vec_size)
    {
      lp.store(rp.load());
      n -= vec_size;
      lp.increment();
      rp.increment();
    }

    // Process the remainder, using simple loop fusion.
    for (index_type i = size - n; i != size; ++i) lhs.put(i, rhs.get(i));
  }
Example #3
0
    static void exec(LHS &lhs, RHS const &rhs)
    {
        expr::evaluate(rhs);

        length_type const rows = lhs.size(2, 0);
        length_type const cols = lhs.size(2, 1);
        for (index_type j=0; j<cols; ++j)
            for (index_type i=0; i<rows; ++i)
                lhs.put(i, j, rhs.get(i, j));
    }
Example #4
0
    static void exec(LHS &lhs, RHS const &rhs)
    {
        expr::evaluate(rhs);

        length_type const size0 = lhs.size(3, 0);
        length_type const size1 = lhs.size(3, 1);
        length_type const size2 = lhs.size(3, 2);

        for (index_type i=0; i<size0; ++i)
            for (index_type k=0; k<size2; ++k)
                for (index_type j=0; j<size1; ++j)
                    lhs.put(i, j, k, rhs.get(i, j, k));
    }