void index_of(MatrixA const &mat, MatrixB &indexed_of_mat, graphblas::IndexType base_index) { graphblas::IndexType rows, cols; mat.get_shape(rows, cols); using T = typename MatrixA::ScalarType; for (IndexType i = 0; i < rows; ++i) { for (IndexType j = 0; j < cols; ++j) { auto mat_ij = mat.get_value_at(i, j); if (mat_ij > 0 || mat_ij == std::numeric_limits<T>::max()) { indexed_of_mat.set_value_at(i, j, i + base_index); } else { // FIXME indexed_of_mat.get_zero()? indexed_of_mat.set_value_at(i, j, mat.get_zero()); } } } }
void operator()(const communicator& comm, const config& cfg, T alpha, MatrixA& A, MatrixB& B, T beta, MatrixC& C) { using namespace matrix_constants; len_type m = (Mat == MAT_A ? A.length(0) : Mat == MAT_B ? B.length(0) : C.length(0)); len_type n = (Mat == MAT_A ? A.length(1) : Mat == MAT_B ? B.length(1) : C.length(1)); if (!rscat) { if (comm.master()) { scat_buffer = Pool.allocate<stride_type>(2*m + 2*n); rscat = scat_buffer.get<stride_type>(); } comm.broadcast(rscat); cscat = rscat+m; rbs = cscat+n; cbs = rbs+m; } matrify_and_run<Mat>(*this, comm, cfg, alpha, A, B, beta, C); }
matrify_and_run(Parent& parent, const communicator& comm, const config& cfg, T alpha, MatrixA& A, MatrixB& B, T beta, MatrixC& C) { const len_type MB = cfg.gemm_kr.def<T>(); const len_type NB = cfg.gemm_nr.def<T>(); //block_scatter(comm, B, parent.rscat, MB, parent.rbs, // parent.cscat, NB, parent.cbs); B.fill_block_scatter(0, parent.rscat, MB, parent.rbs); B.fill_block_scatter(1, parent.cscat, NB, parent.cbs); block_scatter_matrix<T> M(B.length(0), B.length(1), B.data(), parent.rscat, MB, parent.rbs, parent.cscat, NB, parent.cbs); parent.child(comm, cfg, alpha, A, M, beta, C); }
inline void smat_smat_mult(const MatrixA& a, const MatrixB& b, MatrixC& c, Assign, tag::col_major, // orientation a tag::row_major) // orientation b { if (Assign::init_to_zero) set_to_zero(c); // Average numbers of non-zeros per row double ava= double(a.nnz()) / num_rows(a), avb= double(b.nnz()) / num_rows(b); // Define Updater type corresponding to assign mode typedef typename Collection<MatrixC>::value_type c_value_type; typedef typename operations::update_assign_mode<Assign, c_value_type>::type Updater; // Reserve 20% over the average's product for entries in c matrix::inserter<MatrixC, Updater> ins(c, int( ava * avb * 1.2 )); typename traits::row<MatrixA>::type row_a(a); typename traits::col<MatrixA>::type col_a(a); typename traits::const_value<MatrixA>::type value_a(a); typename traits::row<MatrixB>::type row_b(b); typename traits::col<MatrixB>::type col_b(b); typename traits::const_value<MatrixB>::type value_b(b); typedef typename traits::range_generator<tag::col, MatrixA>::type a_cursor_type; a_cursor_type a_cursor = begin<tag::col>(a), a_cend = end<tag::col>(a); typedef typename traits::range_generator<tag::row, MatrixB>::type b_cursor_type; b_cursor_type b_cursor = begin<tag::row>(b); for (unsigned ca= 0; a_cursor != a_cend; ++ca, ++a_cursor, ++b_cursor) { // Iterate over non-zeros of A's column typedef typename traits::range_generator<tag::nz, a_cursor_type>::type ia_cursor_type; for (ia_cursor_type ia_cursor = begin<tag::nz>(a_cursor), ia_cend = end<tag::nz>(a_cursor); ia_cursor != ia_cend; ++ia_cursor) { typename Collection<MatrixA>::size_type ra= row_a(*ia_cursor); // row of non-zero typename Collection<MatrixA>::value_type va= value_a(*ia_cursor); // value of non-zero // Iterate over non-zeros of B's row typedef typename traits::range_generator<tag::nz, b_cursor_type>::type ib_cursor_type; for (ib_cursor_type ib_cursor = begin<tag::nz>(b_cursor), ib_cend = end<tag::nz>(b_cursor); ib_cursor != ib_cend; ++ib_cursor) { typename Collection<MatrixB>::size_type cb= col_b(*ib_cursor); // column of non-zero typename Collection<MatrixB>::value_type vb= value_b(*ib_cursor); // value of non-zero ins(ra, cb) << va * vb; } } } }
inline void smat_smat_mult(const MatrixA& A, const MatrixB& B, MatrixC& C, Assign, tag::col_major, // orientation A tag::row_major) // orientation B { if (Assign::init_to_zero) set_to_zero(C); // Average numbers of non-zeros per row double ava= double(A.nnz()) / num_rows(A), avb= double(B.nnz()) / num_rows(B); // Define Updater type corresponding to assign mode typedef typename Collection<MatrixC>::value_type C_value_type; typedef typename operations::update_assign_mode<Assign, C_value_type>::type Updater; // Reserve 20% over the average's product for entries in C matrix::inserter<MatrixC, Updater> ins(C, int( ava * avb * 1.2 )); typename traits::row<MatrixA>::type row_A(A); typename traits::col<MatrixA>::type col_A(A); typename traits::const_value<MatrixA>::type value_A(A); typename traits::row<MatrixB>::type row_B(B); typename traits::col<MatrixB>::type col_B(B); typename traits::const_value<MatrixB>::type value_B(B); typedef typename traits::range_generator<tag::col, MatrixA>::type A_cursor_type; A_cursor_type A_cursor = begin<tag::col>(A), A_cend = end<tag::col>(A); typedef typename traits::range_generator<tag::row, MatrixB>::type B_cursor_type; B_cursor_type B_cursor = begin<tag::row>(B); for (unsigned ca= 0; A_cursor != A_cend; ++ca, ++A_cursor, ++B_cursor) { // Iterate over non-zeros of A's column typedef typename traits::range_generator<tag::nz, A_cursor_type>::type ia_cursor_type; for (ia_cursor_type ia_cursor = begin<tag::nz>(A_cursor), ia_cend = end<tag::nz>(A_cursor); ia_cursor != ia_cend; ++ia_cursor) { typename Collection<MatrixA>::size_type ra= row_A(*ia_cursor); // row of non-zero typename Collection<MatrixA>::value_type va= value_A(*ia_cursor); // value of non-zero // Iterate over non-zeros of B's row typedef typename traits::range_generator<tag::nz, B_cursor_type>::type ib_cursor_type; for (ib_cursor_type ib_cursor = begin<tag::nz>(B_cursor), ib_cend = end<tag::nz>(B_cursor); ib_cursor != ib_cend; ++ib_cursor) { typename Collection<MatrixB>::size_type cb= col_B(*ib_cursor); // column of non-zero typename Collection<MatrixB>::value_type vb= value_B(*ib_cursor); // value of non-zero ins(ra, cb) << va * vb; } } } }
inline void smat_smat_mult(const MatrixA& A, const MatrixB& B, MatrixC& C, Assign, tag::row_major, // orientation A tag::row_major) // orientation B { if (Assign::init_to_zero) set_to_zero(C); // Average numbers of non-zeros per row double ava= num_cols(A) ? double(A.nnz()) / num_cols(A) : 0, avb= num_rows(B) ? double(B.nnz()) / num_rows(B) : 0; // Define Updater type corresponding to assign mode typedef typename Collection<MatrixC>::value_type C_value_type; typedef typename operations::update_assign_mode<Assign, C_value_type>::type Updater; // Reserve 20% over the average's product for entries in C matrix::inserter<MatrixC, Updater> ins(C, int( ava * avb * 1.4 )); typename traits::row<MatrixA>::type row_A(A); typename traits::col<MatrixA>::type col_A(A); typename traits::const_value<MatrixA>::type value_A(A); typename traits::col<MatrixB>::type col_B(B); typename traits::const_value<MatrixB>::type value_B(B); typedef typename traits::range_generator<tag::row, MatrixA>::type cursor_type; cursor_type cursor = begin<tag::row>(A), cend = end<tag::row>(A); for (unsigned ra= 0; cursor != cend; ++ra, ++cursor) { // Iterate over non-zeros of each row of A typedef typename traits::range_generator<tag::nz, cursor_type>::type icursor_type; for (icursor_type icursor = begin<tag::nz>(cursor), icend = end<tag::nz>(cursor); icursor != icend; ++icursor) { typename Collection<MatrixA>::size_type ca= col_A(*icursor); // column of non-zero typename Collection<MatrixA>::value_type va= value_A(*icursor); // value of non-zero // Get cursor corresponding to row 'ca' in matrix B typedef typename traits::range_generator<tag::row, MatrixB>::type B_cursor_type; B_cursor_type B_cursor = begin<tag::row>(B); B_cursor+= ca; // Iterate over non-zeros of this row typedef typename traits::range_generator<tag::nz, B_cursor_type>::type ib_cursor_type; for (ib_cursor_type ib_cursor = begin<tag::nz>(B_cursor), ib_cend = end<tag::nz>(B_cursor); ib_cursor != ib_cend; ++ib_cursor) { typename Collection<MatrixB>::size_type cb= col_B(*ib_cursor); // column of non-zero typename Collection<MatrixB>::value_type vb= value_B(*ib_cursor); // value of non-zero ins(ra, cb) << va * vb; } } } }
void operator()(const communicator& comm, const config& cfg, T alpha, MatrixA& A, MatrixB& B, T beta, MatrixC& C) { using namespace matrix_constants; const len_type MR = (Mat == MAT_B ? cfg.gemm_kr.def<T>() : cfg.gemm_mr.def<T>()); const len_type NR = (Mat == MAT_A ? cfg.gemm_kr.def<T>() : cfg.gemm_nr.def<T>()); len_type m = (Mat == MAT_A ? A.length(0) : Mat == MAT_B ? B.length(0) : C.length(0)); len_type n = (Mat == MAT_A ? A.length(1) : Mat == MAT_B ? B.length(1) : C.length(1)); m = round_up(m, MR); n = round_up(n, NR); auto& pack_buffer = child.pack_buffer; auto& pack_ptr = child.pack_ptr; if (!pack_ptr) { if (comm.master()) { len_type scatter_size = size_as_type<stride_type,T>(2*m + 2*n); pack_buffer = Pool.allocate<T>(m*n + std::max(m,n)*TBLIS_MAX_UNROLL + scatter_size); pack_ptr = pack_buffer.get(); } comm.broadcast(pack_ptr); rscat = convert_and_align<T,stride_type>(static_cast<T*>(pack_ptr) + m*n); cscat = rscat+m; rbs = cscat+n; cbs = rbs+m; } Sib::operator()(comm, cfg, alpha, A, B, beta, C); }