Vector<dataType> mvmul(const MatrixExpression<MT,dataType> &A, const VectorExpression<VT,dataType> &x) { #ifndef _OPTIMIZED //ensure proper dimensions if(A.cols() != x.size()) { throw std::length_error("mvmul dimension mismatch"); } #endif Vector<dataType> b(A.rows()); #ifdef _OPENMP #pragma omp parallel for #endif for(std::size_t i=0; i<A.rows(); ++i) { dataType sum(0); auto N = A.cols()/4; auto NN = 4*N; for(std::size_t j=0; j<NN; j+= 4) { sum += A(i,j+0)*x(j+0); sum += A(i,j+1)*x(j+1); sum += A(i,j+2)*x(j+2); sum += A(i,j+3)*x(j+3); } for(std::size_t j=NN; j<A.cols(); ++j) { sum += A(i,j)*x(j); } b(i) = sum; } return b; }
inline const typename A::BASE_TYPE Sum(const VectorExpression<T2, A> &expr, bool concurrent = false) { typename A::BASE_TYPE ret; // = TT(0.0); if (concurrent) { int range = expr.Size(0) / std::thread::hardware_concurrency(); std::vector<std::thread> pool; std::vector<typename A::BASE_TYPE > temp(std::thread::hardware_concurrency()); for (int i = 0; i < std::thread::hardware_concurrency(); i++) { if (i < (std::thread::hardware_concurrency() - 1)) { // std::cout << i*range << " - " << (i + 1) * range << "\n"; pool.push_back(std::thread(SumThread<T2, A>, std::ref(temp[i]), i*range, (i + 1) * range, std::ref(expr))); } else { pool.push_back(std::thread(SumThread<T2, A>, std::ref(temp[i]), i*range, expr.Size(0), std::ref(expr))); // std::cout << i*range << " - " << expr.Size(0) << "\n"; } } for (int i = 0; i < pool.size(); i++) { pool[i].join(); } for (int i = 0; i < std::thread::hardware_concurrency(); i++) { ret += temp[i]; } } else { size_t s = expr.Size(0); size_t end = (((s - 1UL) & size_t(-2)) + 1UL); ret = expr(0); for (size_t i = 1UL; i < end; i += 2UL) { ret += expr(i) + expr(i + 1); } if (end < s) { ret += expr(end); } } return ret; }
inline const typename atl::PromoteType<typename LHS::RET_TYPE, typename RHS::RET_TYPE >::return_type Dot(const VectorExpression<typename LHS::RET_TYPE, LHS>& a, const VectorExpression<typename RHS::RET_TYPE, RHS>& b) { typedef typename atl::PromoteType<typename LHS::BASE_TYPE, typename RHS::BASE_TYPE>::return_type R_TYPE; R_TYPE sum; int lmin = a.IndexMin(); int lmax = a.IndexMax(); int rmin = b.IndexMin(); int rmax = b.IndexMax(); int min = std::max(lmin, rmin); int max = std::min(lmax, rmax); for (int i = min; i <= max; i++) { sum += a(i) * b(i); } return sum; }
Vector(const VectorExpression<T2, A> &expr) : isize(0) { isize = expr.Size(0); data_m.resize(isize); for (int i = 0; i < isize; i++) { data_m[i] = expr(i); } }
Vector(VectorExpression<E> const& other) : values(other.size()) { for (std::size_t i=0; i<other.size(); i++) { values[i] = other[i]; } }