Ejemplo n.º 1
0
    void operator()(const communicator& comm, const config& cfg,
                    T alpha, MatrixA& A, MatrixB& B, T beta, MatrixC& C)
    {
        using namespace matrix_constants;

        len_type m = (Mat == MAT_A ? A.length(0) : Mat == MAT_B ? B.length(0) : C.length(0));
        len_type n = (Mat == MAT_A ? A.length(1) : Mat == MAT_B ? B.length(1) : C.length(1));

        if (!rscat)
        {
            if (comm.master())
            {
                scat_buffer = Pool.allocate<stride_type>(2*m + 2*n);
                rscat = scat_buffer.get<stride_type>();
            }

            comm.broadcast(rscat);

            cscat = rscat+m;
            rbs = cscat+n;
            cbs = rbs+m;
        }

        matrify_and_run<Mat>(*this, comm, cfg, alpha, A, B, beta, C);
    }
Ejemplo n.º 2
0
    matrify_and_run(Parent& parent, const communicator& comm, const config& cfg,
                    T alpha, MatrixA& A, MatrixB& B, T beta, MatrixC& C)
    {
        const len_type MB = cfg.gemm_kr.def<T>();
        const len_type NB = cfg.gemm_nr.def<T>();

        //block_scatter(comm, B, parent.rscat, MB, parent.rbs,
        //                       parent.cscat, NB, parent.cbs);

        B.fill_block_scatter(0, parent.rscat, MB, parent.rbs);
        B.fill_block_scatter(1, parent.cscat, NB, parent.cbs);

        block_scatter_matrix<T> M(B.length(0), B.length(1), B.data(),
                                  parent.rscat, MB, parent.rbs,
                                  parent.cscat, NB, parent.cbs);

        parent.child(comm, cfg, alpha, A, M, beta, C);
    }
Ejemplo n.º 3
0
    void operator()(const communicator& comm, const config& cfg,
                    T alpha, MatrixA& A, MatrixB& B, T beta, MatrixC& C)
    {
        using namespace matrix_constants;

        const len_type MR = (Mat == MAT_B ? cfg.gemm_kr.def<T>()
                                          : cfg.gemm_mr.def<T>());
        const len_type NR = (Mat == MAT_A ? cfg.gemm_kr.def<T>()
                                          : cfg.gemm_nr.def<T>());

        len_type m = (Mat == MAT_A ? A.length(0) : Mat == MAT_B ? B.length(0) : C.length(0));
        len_type n = (Mat == MAT_A ? A.length(1) : Mat == MAT_B ? B.length(1) : C.length(1));
        m = round_up(m, MR);
        n = round_up(n, NR);

        auto& pack_buffer = child.pack_buffer;
        auto& pack_ptr = child.pack_ptr;

        if (!pack_ptr)
        {
            if (comm.master())
            {
                len_type scatter_size = size_as_type<stride_type,T>(2*m + 2*n);
                pack_buffer = Pool.allocate<T>(m*n + std::max(m,n)*TBLIS_MAX_UNROLL + scatter_size);
                pack_ptr = pack_buffer.get();
            }

            comm.broadcast(pack_ptr);

            rscat = convert_and_align<T,stride_type>(static_cast<T*>(pack_ptr) + m*n);
            cscat = rscat+m;
            rbs = cscat+n;
            cbs = rbs+m;
        }

        Sib::operator()(comm, cfg, alpha, A, B, beta, C);
    }