inline const typename A::BASE_TYPE Sum(const VectorExpression<T2, A> &expr, bool concurrent = false) { typename A::BASE_TYPE ret; // = TT(0.0); if (concurrent) { int range = expr.Size(0) / std::thread::hardware_concurrency(); std::vector<std::thread> pool; std::vector<typename A::BASE_TYPE > temp(std::thread::hardware_concurrency()); for (int i = 0; i < std::thread::hardware_concurrency(); i++) { if (i < (std::thread::hardware_concurrency() - 1)) { // std::cout << i*range << " - " << (i + 1) * range << "\n"; pool.push_back(std::thread(SumThread<T2, A>, std::ref(temp[i]), i*range, (i + 1) * range, std::ref(expr))); } else { pool.push_back(std::thread(SumThread<T2, A>, std::ref(temp[i]), i*range, expr.Size(0), std::ref(expr))); // std::cout << i*range << " - " << expr.Size(0) << "\n"; } } for (int i = 0; i < pool.size(); i++) { pool[i].join(); } for (int i = 0; i < std::thread::hardware_concurrency(); i++) { ret += temp[i]; } } else { size_t s = expr.Size(0); size_t end = (((s - 1UL) & size_t(-2)) + 1UL); ret = expr(0); for (size_t i = 1UL; i < end; i += 2UL) { ret += expr(i) + expr(i + 1); } if (end < s) { ret += expr(end); } } return ret; }
Vector(const VectorExpression<T2, A> &expr) : isize(0) { isize = expr.Size(0); data_m.resize(isize); for (int i = 0; i < isize; i++) { data_m[i] = expr(i); } }