std::shared_ptr<matrix> operator()(const matrix &A, unsigned block_size = 1) const { communicator comm = A.comm(); idx_t n = A.loc_rows(); ptrdiff_t row_beg = A.loc_col_shift(); // Partition the graph. int active = (n > 0); int active_ranks = comm.reduce(MPI_SUM, active); idx_t npart = std::max(1, active_ranks / prm.shrink_ratio); if (comm.rank == 0) std::cout << "Partitioning[ParMETIS] " << active_ranks << " -> " << npart << std::endl; std::vector<ptrdiff_t> perm(n); ptrdiff_t col_beg, col_end; if (npart == 1) { col_beg = (comm.rank == 0) ? 0 : A.glob_rows(); col_end = A.glob_rows(); for(ptrdiff_t i = 0; i < n; ++i) { perm[i] = row_beg + i; } } else { if (block_size == 1) { std::tie(col_beg, col_end) = partition(A, npart, perm); } else { typedef typename math::scalar_of<value_type>::type scalar; typedef backend::builtin<scalar> sbackend; ptrdiff_t np = n / block_size; distributed_matrix<sbackend> A_pw(A.comm(), pointwise_matrix(*A.local(), block_size), pointwise_matrix(*A.remote(), block_size) ); std::vector<ptrdiff_t> perm_pw(np); std::tie(col_beg, col_end) = partition(A_pw, npart, perm_pw); col_beg *= block_size; col_end *= block_size; for(ptrdiff_t ip = 0; ip < np; ++ip) { ptrdiff_t i = ip * block_size; ptrdiff_t j = perm_pw[ip] * block_size; for(unsigned k = 0; k < block_size; ++k) perm[i + k] = j + k; } } } return graph_perm_matrix<Backend>(comm, col_beg, col_end, perm); }
pointwise_aggregates(const Matrix &A, const params &prm) : count(0) { if (prm.block_size == 1) { plain_aggregates aggr(A, prm); count = aggr.count; strong_connection.swap(aggr.strong_connection); id.swap(aggr.id); } else { strong_connection.resize( nonzeros(A) ); id.resize( rows(A) ); Matrix Ap = pointwise_matrix(A, prm.block_size); plain_aggregates pw_aggr(Ap, prm); count = pw_aggr.count * prm.block_size; #pragma omp parallel { std::vector<ptrdiff_t> marker(Ap.nrows, -1); #ifdef _OPENMP int nt = omp_get_num_threads(); int tid = omp_get_thread_num(); size_t chunk_size = (Ap.nrows + nt - 1) / nt; size_t chunk_start = tid * chunk_size; size_t chunk_end = std::min(Ap.nrows, chunk_start + chunk_size); #else size_t chunk_start = 0; size_t chunk_end = Ap.nrows; #endif for(size_t ip = chunk_start, ia = ip * prm.block_size; ip < chunk_end; ++ip) { ptrdiff_t row_beg = Ap.ptr[ip]; ptrdiff_t row_end = row_beg; for(unsigned k = 0; k < prm.block_size; ++k, ++ia) { id[ia] = prm.block_size * pw_aggr.id[ip] + k; for(ptrdiff_t ja = A.ptr[ia], ea = A.ptr[ia+1]; ja < ea; ++ja) { ptrdiff_t cp = A.col[ja] / prm.block_size; if (marker[cp] < row_beg) { marker[cp] = row_end; strong_connection[ja] = pw_aggr.strong_connection[row_end]; ++row_end; } else { strong_connection[ja] = pw_aggr.strong_connection[ marker[cp] ]; } } } } } } }