예제 #1
0
파일: mvmul.hpp 프로젝트: tjolsen/YAFEL
Vector<dataType> mvmul(const MatrixExpression<MT,dataType> &A,
                       const VectorExpression<VT,dataType> &x) {

#ifndef _OPTIMIZED
  //ensure proper dimensions
  if(A.cols() != x.size()) {
    throw std::length_error("mvmul dimension mismatch");
  }
#endif
  
  Vector<dataType> b(A.rows());
  
#ifdef _OPENMP
  #pragma omp parallel for
#endif
  for(std::size_t i=0; i<A.rows(); ++i) {
    dataType sum(0);
    auto N = A.cols()/4;
    auto NN = 4*N;
    for(std::size_t j=0; j<NN; j+= 4) {
      sum += A(i,j+0)*x(j+0);
      sum += A(i,j+1)*x(j+1);
      sum += A(i,j+2)*x(j+2);
      sum += A(i,j+3)*x(j+3);
    }
    for(std::size_t j=NN; j<A.cols(); ++j) {
      sum += A(i,j)*x(j);
    }
    b(i) = sum;
  }
  
  return b;
}
예제 #2
0
파일: Containers.hpp 프로젝트: amart/ATL
    inline const typename A::BASE_TYPE Sum(const VectorExpression<T2, A> &expr, bool concurrent = false) {
        typename A::BASE_TYPE ret; // = TT(0.0);


        if (concurrent) {

            int range = expr.Size(0) / std::thread::hardware_concurrency();
            std::vector<std::thread> pool;
            std::vector<typename A::BASE_TYPE > temp(std::thread::hardware_concurrency());

            for (int i = 0; i < std::thread::hardware_concurrency(); i++) {
                if (i < (std::thread::hardware_concurrency() - 1)) {
                    //                    std::cout << i*range << " - " << (i + 1) * range << "\n";
                    pool.push_back(std::thread(SumThread<T2, A>, std::ref(temp[i]), i*range, (i + 1) * range, std::ref(expr)));
                } else {
                    pool.push_back(std::thread(SumThread<T2, A>, std::ref(temp[i]), i*range, expr.Size(0), std::ref(expr)));
                    //                    std::cout << i*range << " - " << expr.Size(0) << "\n";
                }
            }

            for (int i = 0; i < pool.size(); i++) {
                pool[i].join();
            }

            for (int i = 0; i < std::thread::hardware_concurrency(); i++) {
                ret += temp[i];
            }

        } else {

            size_t s = expr.Size(0);
            size_t end = (((s - 1UL) & size_t(-2)) + 1UL);
            ret = expr(0);
            for (size_t i = 1UL; i < end; i += 2UL) {
                ret += expr(i) + expr(i + 1);
            }

            if (end < s) {
                ret += expr(end);
            }
        }
        return ret;
    }
예제 #3
0
    inline const typename atl::PromoteType<typename LHS::RET_TYPE, typename RHS::RET_TYPE >::return_type Dot(const VectorExpression<typename LHS::RET_TYPE, LHS>& a,
            const VectorExpression<typename RHS::RET_TYPE, RHS>& b) {
        typedef typename atl::PromoteType<typename LHS::BASE_TYPE, typename RHS::BASE_TYPE>::return_type R_TYPE;

        R_TYPE sum;

        int lmin = a.IndexMin();
        int lmax = a.IndexMax();
        int rmin = b.IndexMin();
        int rmax = b.IndexMax();

        int min = std::max(lmin, rmin);
        int max = std::min(lmax, rmax);

        for (int i = min; i <= max; i++) {
            sum += a(i) * b(i);
        }
        return sum;
    }
예제 #4
0
파일: Vector.hpp 프로젝트: amart/ATL
        Vector(const VectorExpression<T2, A> &expr)
        : isize(0) {

            isize = expr.Size(0);

            data_m.resize(isize);

            for (int i = 0; i < isize; i++) {
                data_m[i] = expr(i);
            }

        }
예제 #5
0
파일: lazy.cpp 프로젝트: fapablazacl/cpp
 Vector(VectorExpression<E> const& other) : values(other.size()) {
     for (std::size_t i=0; i<other.size(); i++) {
         values[i] = other[i];
     }
 }