C++ (Cpp) DistMatrix::LocalRow Beispiele

Programmiersprache: C++ (Cpp)

Namespace / Paketname: el

Klasse / Typ: DistMatrix

Methode / Funktion: LocalRow

Beispiele auf hotexamples.com: 2

C++ (Cpp) DistMatrix::LocalRow - 2 Beispiele gefunden. Dies sind die am besten bewerteten C++ (Cpp) Beispiele für die el::DistMatrix::LocalRow, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

Grid(11)

Buffer(9)

Height(7)

Width(6)

GetLocal(3)

LockedBuffer(3)

LocalWidth(2)

Matrix(2)

LockedMatrix(2)

LocalRow(2)

DistComm(1)

LocalHeight(1)

GlobalCol(1)

LocalRowOffset(1)

LDim(1)

IsLocalRow(1)

DistData(1)

Get(1)

Root(1)

SetLocal(1)

LocalCol(1)

Beispiel #1

Datei anzeigen

Datei: Gemm_detail.hpp Projekt: poulson/libskylark

inline void inner_panel_mixed_gemm_impl_nn(
        const double alpha,
        const SpParMat<index_type, value_type, SpDCCols<index_type, value_type> > &A,
        const El::DistMatrix<value_type, El::STAR, El::STAR> &S,
        const double beta,
        El::DistMatrix<value_type, col_d, El::STAR> &C) {

    int n_proc_side   = A.getcommgrid()->GetGridRows();
    int output_width  = S.Width();
    int output_height = A.getnrow();

    size_t rank = A.getcommgrid()->GetRank();
    size_t cb_row_offset = utility::cb_my_row_offset(A);

    typedef SpDCCols< index_type, value_type > col_t;
    typedef SpParMat< index_type, value_type, col_t > matrix_type;
    matrix_type &_A = const_cast<matrix_type&>(A);
    col_t &data = _A.seq();

    // 1) compute the local values still using the CombBLAS distribution (2D
    //    processor grid). We assume the result is dense.
    std::vector<double> local_matrix;
    mixed_gemm_local_part_nn(alpha, A, S, 0.0, local_matrix);

    // 2) reduce first along rows so that each processor owns the values in
    //    the output row of the SOMETHING/* matrix and values for processors in
    //    the same processor column.
    boost::mpi::communicator my_row_comm(
            A.getcommgrid()->GetRowWorld(), boost::mpi::comm_duplicate);

    // storage for other procs in same row communicator: rank -> (row, values)
    typedef std::vector<std::pair<int, std::vector<double> > > for_rank_t;
    std::vector<for_rank_t> for_rank(n_proc_side);

    for(size_t local_row = 0; local_row < data.getnrow(); ++local_row) {

        size_t row = local_row + cb_row_offset;

        // the owner for VR/* and VC/* matrices is independent of the column
        size_t target_proc = utility::owner(C, row, static_cast<size_t>(0));

        // if the target processor is not in the current row communicator, get
        // the value in the processor grid sharing the same row.
        if(!A.getcommgrid()->OnSameProcRow(target_proc))
            target_proc = static_cast<int>(rank / n_proc_side) *
                            n_proc_side + target_proc % n_proc_side;

        size_t target_row_rank = A.getcommgrid()->GetRankInProcRow(target_proc);

        // reduce partial row (FIXME: if the resulting matrix is still
        // expected to be sparse, change this to communicate only nnz).
        // Working on local_width columns concurrently per column processing
        // group.
        size_t local_width = S.Width();
        const value_type* buffer = &local_matrix[local_row * local_width];
        std::vector<value_type> new_values(local_width);
        boost::mpi::reduce(my_row_comm, buffer, local_width,
                &new_values[0], std::plus<value_type>(), target_row_rank);

        // processor stores result directly if it is the owning rank of that
        // row, save for subsequent communication along rows otherwise
        if(rank == utility::owner(C, row, static_cast<size_t>(0))) {
            int elem_lrow = C.LocalRow(row);
            for(size_t idx = 0; idx < local_width; ++idx) {
                int elem_lcol = C.LocalCol(idx);
                C.SetLocal(elem_lrow, elem_lcol,
                    new_values[idx] + beta * C.GetLocal(elem_lrow, elem_lcol));
            }
        } else if (rank == target_proc) {
            // store for later comm across rows
            for_rank[utility::owner(C, row, static_cast<size_t>(0)) / n_proc_side].push_back(
                    std::make_pair(row, new_values));
        }
    }

    // 3) gather remaining values along rows: we exchange all the values with
    //    other processors in the same communicator row and then add them to
    //    our local part.
    boost::mpi::communicator my_col_comm(
            A.getcommgrid()->GetColWorld(), boost::mpi::comm_duplicate);

    std::vector<for_rank_t> new_values;
    for(int i = 0; i < n_proc_side; ++i)
        boost::mpi::gather(my_col_comm, for_rank[i], new_values, i);

    // insert new values
    for(size_t proc = 0; proc < new_values.size(); ++proc) {
        const for_rank_t &cur  = new_values[proc];

        for(size_t i = 0; i < cur.size(); ++i) {
            int elem_lrow = C.LocalRow(cur[i].first);
            for(size_t j = 0; j < cur[i].second.size(); ++j) {
                size_t elem_lcol = C.LocalCol(j);
                C.SetLocal(elem_lrow, elem_lcol,
                        cur[i].second[j] + beta *
                        C.GetLocal(elem_lrow, elem_lcol));
            }
        }
    }
}

Beispiel #2

Datei anzeigen

Datei: Gemm_detail.hpp Projekt: poulson/libskylark

inline void outer_panel_mixed_gemm_impl_tn(
        const double alpha,
        const SpParMat<index_type, value_type, SpDCCols<index_type, value_type> > &A,
        const El::DistMatrix<value_type, col_d, El::STAR> &S,
        const double beta,
        El::DistMatrix<value_type, El::STAR, El::STAR> &C) {

    El::DistMatrix<value_type, El::STAR, El::STAR>
        tmp_C(C.Height(), C.Width());
    El::Zero(tmp_C);

    utility::combblas_slab_view_t<index_type, value_type> cbview(A, false);

    //FIXME: factor
    size_t slab_size = 2 * S.Grid().Height();
    for(size_t cur_row_idx = 0; cur_row_idx < cbview.ncols();
        cur_row_idx += slab_size) {

        size_t cur_slab_size =
            std::min(slab_size, cbview.ncols() - cur_row_idx);

        // get the next slab_size columns of B
        El::DistMatrix<value_type, El::STAR, El::STAR>
            A_row(cur_slab_size, S.Height());

        // transpose is column
        //cbview.extract_elemental_column_slab_view(A_row, cur_slab_size);
        cbview.extract_full_slab_view(cur_slab_size);

        // matrix mult (FIXME only iter nz)
        for(size_t l_row_idx = 0; l_row_idx < A_row.LocalHeight();
            ++l_row_idx) {

            size_t g_row_idx = l_row_idx * A_row.ColStride()
                               + A_row.ColShift() + cur_row_idx;

            for(size_t l_col_idx = 0; l_col_idx < A_row.LocalWidth();
                l_col_idx++) {

                //XXX: should be the same as l_col_idx
                size_t g_col_idx = l_col_idx * A_row.RowStride()
                                   + A_row.RowShift();

                // continue if we don't own values in S in this row
                if(!S.IsLocalRow(g_col_idx))
                    continue;

                //get transposed value
                value_type val = alpha * cbview(g_col_idx, g_row_idx);

                for(size_t s_col_idx = 0; s_col_idx < S.LocalWidth();
                    s_col_idx++) {

                    tmp_C.UpdateLocal(g_row_idx, s_col_idx,
                                val * S.GetLocal(S.LocalRow(g_col_idx), s_col_idx));
                }
            }
        }
    }

    //FIXME: scaling
    if(A.getcommgrid()->GetRank() == 0) {
        for(size_t col_idx = 0; col_idx < C.Width(); col_idx++)
            for(size_t row_idx = 0; row_idx < C.Height(); row_idx++)
                tmp_C.UpdateLocal(row_idx, col_idx,
                        beta * C.GetLocal(row_idx, col_idx));
    }

    //FIXME: Use utility getter
    boost::mpi::communicator world(
            A.getcommgrid()->GetWorld(), boost::mpi::comm_duplicate);
    boost::mpi::all_reduce (world,
                        tmp_C.LockedBuffer(),
                        C.Height() * C.Width(),
                        C.Buffer(),
                        std::plus<value_type>());
}