void parfor_all_vertices(GraphType& graph, FunctionType fn, vertex_set vset = GraphType::complete_set(), size_t nfibers = 10000, size_t stacksize = 16384) { distributed_control::get_instance()->barrier(); initialize_counters(); fiber_group group; group.set_stacksize(stacksize); warp_impl::parfor_all_vertices_impl<GraphType> parfor(graph, fn, vset); for (size_t i = 0;i < nfibers; ++i) { group.launch(boost::bind(&warp_impl::parfor_all_vertices_impl<GraphType>::run_fiber, &parfor)); } group.join(); distributed_control::get_instance()->barrier(); graph.synchronize(vset); }
auto pardot(Eigen::MatrixBase<V1> const& A, Eigen::MatrixBase<V2> const& B, int chunkSize = 1024) { if(A.rows() < 10*chunkSize) { return A.dot(B); } using T = std::decay_t<decltype(A(0))>; struct r_struct{T val{0};}; SmallVector<ReductionVariable<r_struct>, 8> S(yafel::config::num_cores, r_struct{0}); parfor(0, A.rows(), [&](auto i) { auto id = worker_global::worker_id; S[id].val += A(i)*B(i); },getGlobalScheduler(), chunkSize); T total{0}; for(auto &s : S){ total += s.val; } return total; }