void compare_result(size_t rank, DistMatrixType &expected_A,
                    El::DistMatrix<double, El::STAR, El::STAR> &result) {

    col_t &data = expected_A.seq();

    const size_t my_row_offset = skylark::utility::cb_my_row_offset(expected_A);
    const size_t my_col_offset = skylark::utility::cb_my_col_offset(expected_A);

    for(typename col_t::SpColIter col = data.begcol();
        col != data.endcol(); col++) {
        for(typename col_t::SpColIter::NzIter nz = data.begnz(col);
            nz != data.endnz(col); nz++) {

            const size_t rowid = nz.rowid()  + my_row_offset;
            const size_t colid = col.colid() + my_col_offset;
            const double value = nz.value();

            if(value != result.GetLocal(rowid, colid)) {
                std::ostringstream os;
                os << rank << ": " << rowid << ", " << colid << ": "
                   << value << " != "
                   << result.GetLocal(rowid, colid) << std::endl;
                std::cout << os.str() << std::flush;
                BOOST_FAIL("Result application not as expected");
            }
        }
    }
}
Example #2
0
void SymmetricEuclideanDistanceMatrix(El::UpperOrLower uplo, direction_t dir,
    T alpha, const El::ElementalMatrix<T> &A,
    T beta, El::ElementalMatrix<T> &C) {

    T *c = C.Buffer();
    int ldC = C.LDim();

    if (dir == base::COLUMNS) {
        El::Herk(uplo, El::ADJOINT, -2.0 * alpha, A, beta, C);
        //El::Gemm(El::ADJOINT, El::NORMAL, T(-2.0) * alpha, A, A, beta, C); 

        El::DistMatrix<El::Base<T>, El::STAR, El::STAR > N;
        ColumnNrm2(A, N);
        El::Base<T> *nn = N.Buffer();;

        El::Int n = C.LocalWidth();
        El::Int m = C.LocalHeight();

        for(El::Int j = 0; j < n; j++)
            for(El::Int i =
                    ((uplo == El::UPPER) ? 0 : C.LocalRowOffset(A.GlobalCol(j)));
                i < ((uplo == El::UPPER) ? C.LocalRowOffset(A.GlobalCol(j) + 1) : m); i++) {

                El::Base<T> a = nn[C.GlobalRow(i)];
                El::Base<T> b = nn[C.GlobalCol(j)];
                c[j * ldC + i] += alpha * (a * a + b * b);
            }
    }

    // TODO the rest of the cases.
}
Example #3
0
inline void mixed_gemm_local_part_nn (
        const double alpha,
        const SpParMat<index_type, value_type, SpDCCols<index_type, value_type> > &A,
        const El::DistMatrix<value_type, El::STAR, El::STAR> &S,
        const double beta,
        std::vector<value_type> &local_matrix) {

    typedef SpDCCols< index_type, value_type > col_t;
    typedef SpParMat< index_type, value_type, col_t > matrix_type;
    matrix_type &_A = const_cast<matrix_type&>(A);
    col_t &data = _A.seq();

    //FIXME
    local_matrix.resize(S.Width() * data.getnrow(), 0);
    size_t cb_col_offset = utility::cb_my_col_offset(A);

    for(typename col_t::SpColIter col = data.begcol();
        col != data.endcol(); col++) {
        for(typename col_t::SpColIter::NzIter nz = data.begnz(col);
            nz != data.endnz(col); nz++) {

            // we want local index here to fill local dense matrix
            index_type rowid = nz.rowid();
            // column needs to be global
            index_type colid = col.colid() + cb_col_offset;

            // compute application of S to yield a partial row in the result.
            for(size_t bcol = 0; bcol < S.Width(); ++bcol) {
                local_matrix[rowid * S.Width() + bcol] +=
                        alpha * S.Get(colid, bcol) * nz.value();
            }
        }
    }
}
Example #4
0
inline void Gemv(El::Orientation oA,
    T alpha, const El::DistMatrix<T, El::VC, El::STAR>& A,
    const El::DistMatrix<T, El::STAR, El::STAR>& x,
    El::DistMatrix<T, El::VC, El::STAR>& y) {

    int y_height = (oA == El::NORMAL ? A.Height() : A.Width());
    El::Zeros(y, y_height, 1);
    base::Gemv(oA, alpha, A, x, T(0), y);
}
Example #5
0
inline void Gemv(El::Orientation oA,
    T alpha, const El::DistMatrix<T, El::VC, El::STAR>& A,
    const El::DistMatrix<T, El::STAR, El::STAR>& x,
    T beta, El::DistMatrix<T, El::VC, El::STAR>& y) {
    // TODO verify sizes etc.

    if (oA == El::NORMAL) {
        El::Gemv(El::NORMAL,
            alpha, A.LockedMatrix(), x.LockedMatrix(),
            beta, y.Matrix());
    } else {
        SKYLARK_THROW_EXCEPTION(base::unsupported_base_operation());
    }
}
Example #6
0
inline void outer_panel_mixed_gemm_impl_nn(
        const double alpha,
        const SpParMat<index_type, value_type, SpDCCols<index_type, value_type> > &A,
        const El::DistMatrix<value_type, El::STAR, El::STAR> &S,
        const double beta,
        El::DistMatrix<value_type, col_d, El::STAR> &C) {

    utility::combblas_slab_view_t<index_type, value_type> cbview(A, false);

    //FIXME: factor
    size_t slab_size = 2 * C.Grid().Height();
    for(size_t cur_row_idx = 0; cur_row_idx < cbview.nrows();
        cur_row_idx += slab_size) {

        size_t cur_slab_size =
            std::min(slab_size, cbview.nrows() - cur_row_idx);

        // get the next slab_size columns of B
        El::DistMatrix<value_type, col_d, El::STAR>
            A_row(cur_slab_size, S.Height());

        cbview.extract_elemental_row_slab_view(A_row, cur_slab_size);

        // assemble the distributed column vector
        for(size_t l_col_idx = 0; l_col_idx < A_row.LocalWidth();
            l_col_idx++) {

            size_t g_col_idx = l_col_idx * A_row.RowStride()
                               + A_row.RowShift();

            for(size_t l_row_idx = 0; l_row_idx < A_row.LocalHeight();
                ++l_row_idx) {

                size_t g_row_idx = l_row_idx * A_row.ColStride()
                                   + A_row.ColShift() + cur_row_idx;

                A_row.SetLocal(l_row_idx, l_col_idx,
                               cbview(g_row_idx, g_col_idx));
            }
        }

        El::DistMatrix<value_type, col_d, El::STAR>
            C_slice(cur_slab_size, C.Width());
        El::View(C_slice, C, cur_row_idx, 0, cur_slab_size, C.Width());
        El::LocalGemm(El::NORMAL, El::NORMAL, alpha, A_row, S,
                        beta, C_slice);
    }
}
Example #7
0
inline void Gemv(El::Orientation oA,
    T alpha, const El::DistMatrix<T, El::VC, El::STAR>& A,
    const El::DistMatrix<T, El::VC, El::STAR>& x,
    T beta, El::DistMatrix<T, El::STAR, El::STAR>& y) {
    // TODO verify sizes etc.
    // TODO verify matching grids.

    if (oA == El::TRANSPOSE) {
        boost::mpi::communicator comm(y.Grid().Comm().comm,
            boost::mpi::comm_attach);
        El::Matrix<T> ylocal(y.Matrix());
        El::Gemv(El::TRANSPOSE,
            alpha, A.LockedMatrix(), x.LockedMatrix(),
            beta / T(comm.size()), ylocal);
        boost::mpi::all_reduce(comm,
            ylocal.Buffer(), ylocal.MemorySize(), y.Buffer(),
            std::plus<T>());
    } else {
        SKYLARK_THROW_EXCEPTION(base::unsupported_base_operation());
    }
}
/*
 * Convert an elemental STAR, STAR distributed vector toa  std::vector
 */
int elstar2vec(const El::DistMatrix<El::Complex<double>,El::STAR,El::STAR> &Y, std::vector<double> &vec){

	const El::Complex<double> *Y_ptr = Y.LockedBuffer();
	int sz = vec.size()/2;
	#pragma omp parallel for
	for(int i=0;i<sz; i++){
		vec[2*i] = El::RealPart(Y_ptr[i]);
		vec[2*i+1] = El::ImagPart(Y_ptr[i]);
	}

	return 0;
}
/*
 * Convert a std::vector to an elemental STAR, STAR distributed vector
 */
int vec2elstar(const std::vector<double> &vec, El::DistMatrix<El::Complex<double>,El::STAR,El::STAR > &Y){

	El::Complex<double> *Y_ptr = Y.Buffer();
	int sz = vec.size()/2;
	#pragma omp parallel for
	for(int i=0;i<sz; i++){
		double real = vec[2*i];
		double imag = vec[2*i+1];
		El::SetRealPart(Y_ptr[i],real);
		El::SetImagPart(Y_ptr[i],imag);
	}

	return 0;
}
int elemental2vec(const El::DistMatrix<El::Complex<double>,El::VC,El::STAR> &Y, std::vector<double> &vec){
	
	assert((Y.DistData().colDist == El::STAR) and (Y.DistData().rowDist == El::VC));

	int data_dof=2;
	int SCAL_EXP = 1;

	//double *pt_array,*pt_perm_array;
	int r,q,ll,rq; // el vec info
	int nbigs; //Number of large recv (i.e. recv 1 extra data point)
	int pstart; // p_id of nstart
	int rank = El::mpi::WorldRank(); //p_id
	int recv_size; // base recv size
	bool print = (rank == -1); 

	// Get el vec info
	ll = Y.Height();
	const El::Grid* g = &(Y.Grid());
	r = g->Height();
	q = g->Width();
	MPI_Comm comm = (g->Comm()).comm;

	int cheb_deg = InvMedTree<FMM_Mat_t>::cheb_deg;
	int omp_p=omp_get_max_threads();
	size_t n_coeff3=(cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3)/6;
	
	// Get petsc vec params
	//VecGetLocalSize(pt_vec,&nlocal);
	int nlocal = (vec.size())/data_dof;
	if(print) std::cout << "m: " << std::endl;
	int nstart = 0;
	//VecGetArray(pt_vec,&pt_array);
	//VecGetOwnershipRange(pt_vec,&nstart,NULL);
	MPI_Exscan(&nlocal,&nstart,1,MPI_INT,MPI_SUM,comm);

	// Determine who owns the first element we want
	rq = r * q;
	pstart = nstart % rq;
	nbigs = nlocal % rq;
	recv_size = nlocal / rq;
	
	if(print){
		std::cout << "r: " << r << " q: " << q <<std::endl;
		std::cout << "nstart: " << nstart << std::endl;
		std::cout << "ps: " << pstart << std::endl;
		std::cout << "nbigs: " << nbigs << std::endl;
		std::cout << "recv_size: " << recv_size << std::endl;
	}

	// Make recv sizes
	std::vector<int> recv_lengths(rq);
	std::fill(recv_lengths.begin(),recv_lengths.end(),recv_size);
	if(nbigs >0){
		for(int i=0;i<nbigs;i++){
			recv_lengths[(pstart + i) % rq] += 1;
		}
	}

	// Make recv disps
	std::vector<int> recv_disps = exscan(recv_lengths);

	// All2all to get send sizes
	std::vector<int> send_lengths(rq);
	MPI_Alltoall(&recv_lengths[0], 1, MPI_INT, &send_lengths[0], 1, MPI_INT,comm);

	// Scan to get send_disps
	std::vector<int> send_disps = exscan(send_lengths);

	// Do all2allv to get data on correct processor
	std::vector<El::Complex<double>> recv_data(nlocal);
	std::vector<El::Complex<double>> recv_data_ordered(nlocal);
	//MPI_Alltoallv(el_vec.Buffer(),&send_lengths[0],&send_disps[0],MPI_DOUBLE, \
			&recv_data[0],&recv_lengths[0],&recv_disps[0],MPI_DOUBLE,comm);
	El::mpi::AllToAll(Y.LockedBuffer(), &send_lengths[0], &send_disps[0], &recv_data[0],&recv_lengths[0],&recv_disps[0],comm);
	
	if(print){
		//std::cout << "Send data: " <<std::endl << *el_vec.Buffer() <<std::endl;
		std::cout << "Send lengths: " <<std::endl << send_lengths <<std::endl;
		std::cout << "Send disps: " <<std::endl << send_disps <<std::endl;
		std::cout << "Recv data: " <<std::endl << recv_data <<std::endl;
		std::cout << "Recv lengths: " <<std::endl << recv_lengths <<std::endl;
		std::cout << "Recv disps: " <<std::endl << recv_disps <<std::endl;
	}
	
	// Reorder the data so taht it is in the right order for the fmm tree
	for(int p=0;p<rq;p++){
		int base_idx = (p - pstart + rq) % rq;
		int offset = recv_disps[p];
		for(int i=0;i<recv_lengths[p];i++){
			recv_data_ordered[base_idx + rq*i] = recv_data[offset + i];
		}
	}

	// loop through and put the data into the vector
	#pragma omp parallel for
	for(int i=0;i<nlocal; i++){
		vec[2*i] = El::RealPart(recv_data_ordered[i]);
		vec[2*i+1] = El::ImagPart(recv_data_ordered[i]);
	}

	if(print){std::cout <<"here?"<<std::endl;}

	return 0;

}
Example #11
0
int Width(const El::DistMatrix<T, U, V>& A) {
    return A.Width();
}
Example #12
0
 static type build_compatible(int m, int n, const El::DistMatrix<F, U, V>& A) {
     return type(m, n, A.Grid(), A.Root());
 }
index_type owner(const El::DistMatrix<value_type, El::VR, El::STAR> &A,
                 const index_type row_idx, const index_type col_idx) {

    return (row_idx / A.Grid().Width()) % A.Grid().Height() +
           (row_idx % A.Grid().Width()) * A.Grid().Height();
}
int vec2elemental(const std::vector<double> &vec, El::DistMatrix<El::Complex<double>,El::VC,El::STAR> &Y){

	int data_dof=2;
	int SCAL_EXP = 1;

	int nlocal,gsize; //local elements, start p_id, global size
	double *pt_array; // will hold local array
	int r,q,rq; //Grid sizes
	int nbigs; //Number of large sends (i.e. send 1 extra data point)
	int pstart; // p_id of nstart
	int rank = El::mpi::WorldRank(); //p_id
	int send_size; // base send size
	bool print = rank == -1; 


	// Get Grid and associated params
	const El::Grid* g = &(Y.Grid());
	r = g->Height();
	q = g->Width();
	MPI_Comm comm = (g->Comm()).comm;

	// Get sizes, array in petsc 
	nlocal = vec.size()/data_dof;
	int nstart = 0;
	MPI_Exscan(&nlocal,&nstart,1,MPI_INT,MPI_SUM,comm);
	//VecGetOwnershipRange(pt_vec,&nstart,NULL);

	//Find processor that nstart belongs to, number of larger sends
	rq = r * q;
	pstart = nstart % rq; //int div
	nbigs = nlocal % rq;
	send_size = nlocal/rq;
	
	if(print){
		std::cout << "r: " << r << " q: " << q <<std::endl;
		std::cout << "nstart: " << nstart << std::endl;
		std::cout << "ps: " << pstart << std::endl;
		std::cout << "nbigs: " << nbigs << std::endl;
		std::cout << "send_size: " << send_size << std::endl;
	}

	// Make send_lengths
	std::vector<int> send_lengths(rq);
	std::fill(send_lengths.begin(),send_lengths.end(),send_size);
	if(nbigs >0){
		for(int j=0;j<nbigs;j++){
			send_lengths[(pstart + j) % rq] += 1;
		}
	}

	// Make send_disps
	std::vector<int> send_disps = exscan(send_lengths);

	std::vector<El::Complex<double>> indata(nlocal);
	// copy the data from an ffm tree to into a local vec of complex data for sending #pragma omp parallel for
	El::Complex<double> val;
	for(int i=0;i<nlocal;i++){
		El::SetRealPart(val,vec[2*i+0]);
		El::SetImagPart(val,vec[2*i+1]);
		indata[i] = val;
	}


	// Make send_dataA, i.e. reorder the data
	std::vector<El::Complex<double>> send_data(nlocal);
	for(int proc=0;proc<rq;proc++){
		int offset = send_disps[proc];
		int base_idx = (proc - pstart + rq) % rq; 
		for(int j=0; j<send_lengths[proc]; j++){
			int idx = base_idx + (j * rq);
			send_data[offset + j] = indata[idx];
		}
	}

	// Do all2all to get recv_lengths
	std::vector<int> recv_lengths(rq);
	MPI_Alltoall(&send_lengths[0], 1, MPI_INT, &recv_lengths[0], 1, MPI_INT,comm);

	// Scan to get recv_disps
	std::vector<int> recv_disps = exscan(recv_lengths);

	// Do all2allv to get data on correct processor
	El::Complex<double> * recv_data = Y.Buffer();
	//MPI_Alltoallv(&send_data[0],&send_lengths[0],&send_disps[0],MPI_DOUBLE, \
	//		&recv_data[0],&recv_lengths[0],&recv_disps[0],MPI_DOUBLE,comm);
	El::mpi::AllToAll(&send_data[0], &send_lengths[0], &send_disps[0], recv_data,&recv_lengths[0],&recv_disps[0],comm);

	if(print){
		std::cout << "Send data: " <<std::endl << send_data <<std::endl;
		std::cout << "Send lengths: " <<std::endl << send_lengths <<std::endl;
		std::cout << "Send disps: " <<std::endl << send_disps <<std::endl;
		std::cout << "Recv data: " <<std::endl << recv_data <<std::endl;
		std::cout << "Recv lengths: " <<std::endl << recv_lengths <<std::endl;
		std::cout << "Recv disps: " <<std::endl << recv_disps <<std::endl;
	}

	return 0;
}
int tree2elemental(InvMedTree<FMM_Mat_t> *tree, El::DistMatrix<T,El::VC,El::STAR> &Y){

	int data_dof=2;
	int SCAL_EXP = 1;

	int nlocal,gsize; //local elements, start p_id, global size
	double *pt_array; // will hold local array
	int r,q,rq; //Grid sizes
	int nbigs; //Number of large sends (i.e. send 1 extra data point)
	int pstart; // p_id of nstart
	int rank = El::mpi::WorldRank(); //p_id
	int send_size; // base send size
	bool print = rank == -1; 


	// Get Grid and associated params
	const El::Grid* g = &(Y.Grid());
	r = g->Height();
	q = g->Width();
	MPI_Comm comm = (g->Comm()).comm;

	std::vector<FMMNode_t*> nlist = tree->GetNGLNodes();

	int cheb_deg = InvMedTree<FMM_Mat_t>::cheb_deg;
	int omp_p=omp_get_max_threads();
	size_t n_coeff3=(cheb_deg+1)*(cheb_deg+2)*(cheb_deg+3)/6;

	// Get sizes, array in petsc 
	//VecGetSize(pt_vec,&gsize);
	gsize = tree->M/data_dof;
	nlocal = tree->m/data_dof;
	//VecGetLocalSize(pt_vec,&nlocal);
	//VecGetArray(pt_vec,&pt_array);
	int nstart = 0;
	MPI_Exscan(&nlocal,&nstart,1,MPI_INT,MPI_SUM,comm);
	//VecGetOwnershipRange(pt_vec,&nstart,NULL);

	//Find processor that nstart belongs to, number of larger sends
	rq = r * q;
	pstart = nstart % rq; //int div
	nbigs = nlocal % rq;
	send_size = nlocal/rq;
	
	if(print){
		std::cout << "r: " << r << " q: " << q <<std::endl;
		std::cout << "nstart: " << nstart << std::endl;
		std::cout << "ps: " << pstart << std::endl;
		std::cout << "nbigs: " << nbigs << std::endl;
		std::cout << "send_size: " << send_size << std::endl;
	}

	// Make send_lengths
	std::vector<int> send_lengths(rq);
	std::fill(send_lengths.begin(),send_lengths.end(),send_size);
	if(nbigs >0){
		for(int j=0;j<nbigs;j++){
			send_lengths[(pstart + j) % rq] += 1;
		}
	}

	// Make send_disps
	std::vector<int> send_disps = exscan(send_lengths);

	std::vector<El::Complex<double>> indata(nlocal);
	// copy the data from an ffm tree to into a local vec of complex data for sending #pragma omp parallel for
	for(size_t tid=0;tid<omp_p;tid++){
		size_t i_start=(nlist.size()* tid   )/omp_p;
		size_t i_end  =(nlist.size()*(tid+1))/omp_p;
		for(size_t i=i_start;i<i_end;i++){
			pvfmm::Vector<double>& coeff_vec=nlist[i]->ChebData();
			double s=std::pow(0.5,COORD_DIM*nlist[i]->Depth()*0.5*SCAL_EXP);

			size_t offset=i*n_coeff3;
			for(size_t j=0;j<n_coeff3;j++){
				double real = coeff_vec[j]*s; // local indices as in the pvfmm trees
				double imag = coeff_vec[j+n_coeff3]*s;
				El::Complex<double> coeff;
				El::SetRealPart(coeff,real);
				El::SetImagPart(coeff,imag);

				indata[offset+j] = coeff;
			}
		}
	}


	// Make send_data
	std::vector<El::Complex<double>> send_data(nlocal);
	for(int proc=0;proc<rq;proc++){
		int offset = send_disps[proc];
		int base_idx = (proc - pstart + rq) % rq; 
		for(int j=0; j<send_lengths[proc]; j++){
			int idx = base_idx + (j * rq);
			send_data[offset + j] = indata[idx];
		}
	}

	// Do all2all to get recv_lengths
	std::vector<int> recv_lengths(rq);
	MPI_Alltoall(&send_lengths[0], 1, MPI_INT, &recv_lengths[0], 1, MPI_INT,comm);

	// Scan to get recv_disps
	std::vector<int> recv_disps = exscan(recv_lengths);

	// Do all2allv to get data on correct processor
	El::Complex<double> * recv_data = Y.Buffer();
	//MPI_Alltoallv(&send_data[0],&send_lengths[0],&send_disps[0],MPI_DOUBLE, \
	//		&recv_data[0],&recv_lengths[0],&recv_disps[0],MPI_DOUBLE,comm);
	El::mpi::AllToAll(&send_data[0], &send_lengths[0], &send_disps[0], recv_data,&recv_lengths[0],&recv_disps[0],comm);

	if(print){
		std::cout << "Send data: " <<std::endl << send_data <<std::endl;
		std::cout << "Send lengths: " <<std::endl << send_lengths <<std::endl;
		std::cout << "Send disps: " <<std::endl << send_disps <<std::endl;
		std::cout << "Recv data: " <<std::endl << recv_data <<std::endl;
		std::cout << "Recv lengths: " <<std::endl << recv_lengths <<std::endl;
		std::cout << "Recv disps: " <<std::endl << recv_disps <<std::endl;
	}

	return 0;
}
Example #16
0
void El2Petsc_vec(El::DistMatrix<double,VC,STAR>& el_vec,Vec& pt_vec){
	PetscInt nlocal, nstart; // petsc vec info
	PetscScalar *pt_array,*pt_perm_array;
	int r,q,ll,rq; // el vec info
	int nbigs; //Number of large recv (i.e. recv 1 extra data point)
	int pstart; // p_id of nstart
	int p = El::mpi::WorldRank(); //p_id
	int recv_size; // base recv size
	bool print = p == -1; 

	// Get el vec info
	ll = el_vec.Height();
	const El::Grid* g = &(el_vec.Grid());
	r = g->Height();
	q = g->Width();
	MPI_Comm comm = (g->Comm()).comm;
	
	// Get petsc vec params
	VecGetLocalSize(pt_vec,&nlocal);
	VecGetArray(pt_vec,&pt_array);
	VecGetOwnershipRange(pt_vec,&nstart,NULL);

	// Determine who owns the first element we want
	rq = r * q;
	pstart = nstart % rq;
	nbigs = nlocal % rq;
	recv_size = nlocal / rq;
	
	if(print){
		std::cout << "r: " << r << " q: " << q <<std::endl;
		std::cout << "nstart: " << nstart << std::endl;
		std::cout << "ps: " << pstart << std::endl;
		std::cout << "nbigs: " << nbigs << std::endl;
		std::cout << "recv_size: " << recv_size << std::endl;
	}

	// Make recv sizes
	std::vector<int> recv_lengths(rq);
	std::fill(recv_lengths.begin(),recv_lengths.end(),recv_size);
	if(nbigs >0){
		for(int i=0;i<nbigs;i++){
			recv_lengths[(pstart + i) % rq] += 1;
		}
	}

	// Make recv disps
	std::vector<int> recv_disps = exscan(recv_lengths);

	// All2all to get send sizes
	std::vector<int> send_lengths(rq);
	MPI_Alltoall(&recv_lengths[0], 1, MPI_INT, &send_lengths[0], 1, MPI_INT,comm);

	// Scan to get send_disps
	std::vector<int> send_disps = exscan(send_lengths);

	// Do all2allv to get data on correct processor
	std::vector<double> recv_data(nlocal);
	MPI_Alltoallv(el_vec.Buffer(),&send_lengths[0],&send_disps[0],MPI_DOUBLE, \
			&recv_data[0],&recv_lengths[0],&recv_disps[0],MPI_DOUBLE,comm);
	
	if(print){
		//std::cout << "Send data: " <<std::endl << *el_vec.Buffer() <<std::endl;
		std::cout << "Send lengths: " <<std::endl << send_lengths <<std::endl;
		std::cout << "Send disps: " <<std::endl << send_disps <<std::endl;
		std::cout << "Recv data: " <<std::endl << recv_data <<std::endl;
		std::cout << "Recv lengths: " <<std::endl << recv_lengths <<std::endl;
		std::cout << "Recv disps: " <<std::endl << recv_disps <<std::endl;
	}
	
	// Reorder for petsc
	for(int p=0;p<rq;p++){
		int base_idx = (p - pstart + rq) % rq;
		int offset = recv_disps[p];
		for(int i=0;i<recv_lengths[p];i++){
			pt_array[base_idx + rq*i] = recv_data[offset + i];
		}
	}

	// Copy into array
	VecRestoreArray(pt_vec,&pt_array);
}
Example #17
0
void Petsc2El_vec(Vec& pt_vec,El::DistMatrix<double,VC,STAR>& el_vec){
	PetscInt nlocal,nstart,gsize; //local elements, start p_id, global size
	PetscScalar *pt_array; // will hold local array
	int r,q,rq; //Grid sizes
	int nbigs; //Number of large sends (i.e. send 1 extra data point)
	int pstart; // p_id of nstart
	int p = El::mpi::WorldRank(); //p_id
	int send_size; // base send size
	bool print = p == -1; 


	// Get Grid and associated params
	const El::Grid* g = &(el_vec.Grid());
	r = g->Height();
	q = g->Width();
	MPI_Comm comm = (g->Comm()).comm;

	// Get sizes, array in petsc 
	VecGetSize(pt_vec,&gsize);
	VecGetLocalSize(pt_vec,&nlocal);
	VecGetArray(pt_vec,&pt_array);
	VecGetOwnershipRange(pt_vec,&nstart,NULL);

	//Find processor that nstart belongs to, number of larger sends
	rq = r * q;
	pstart = nstart % rq; //int div
	nbigs = nlocal % rq;
	send_size = nlocal/rq;
	
	if(print){
		std::cout << "r: " << r << " q: " << q <<std::endl;
		std::cout << "nstart: " << nstart << std::endl;
		std::cout << "ps: " << pstart << std::endl;
		std::cout << "nbigs: " << nbigs << std::endl;
		std::cout << "send_size: " << send_size << std::endl;
	}

	// Make send_lengths
	std::vector<int> send_lengths(rq);
	std::fill(send_lengths.begin(),send_lengths.end(),send_size);
	if(nbigs >0){
		for(int j=0;j<nbigs;j++){
			send_lengths[(pstart + j) % rq] += 1;
		}
	}

	// Make send_disps
	std::vector<int> send_disps = exscan(send_lengths);

	// Make send_data
	std::vector<double> send_data(nlocal);
	for(int proc=0;proc<rq;proc++){
		int offset = send_disps[proc];
		int base_idx = (proc - pstart + rq) % rq; 
		for(int j=0; j<send_lengths[proc]; j++){
			int idx = base_idx + (j * rq);
			send_data[offset + j] = pt_array[idx];
		}
	}

	// Do all2all to get recv_lengths
	std::vector<int> recv_lengths(rq);
	MPI_Alltoall(&send_lengths[0], 1, MPI_INT, &recv_lengths[0], 1, MPI_INT,comm);

	// Scan to get recv_disps
	std::vector<int> recv_disps = exscan(recv_lengths);

	// Do all2allv to get data on correct processor
	double * recv_data = el_vec.Buffer();
	MPI_Alltoallv(&send_data[0],&send_lengths[0],&send_disps[0],MPI_DOUBLE, \
			&recv_data[0],&recv_lengths[0],&recv_disps[0],MPI_DOUBLE,comm);

	if(print){
		std::cout << "Send data: " <<std::endl << send_data <<std::endl;
		std::cout << "Send lengths: " <<std::endl << send_lengths <<std::endl;
		std::cout << "Send disps: " <<std::endl << send_disps <<std::endl;
		std::cout << "Recv data: " <<std::endl << recv_data <<std::endl;
		std::cout << "Recv lengths: " <<std::endl << recv_lengths <<std::endl;
		std::cout << "Recv disps: " <<std::endl << recv_disps <<std::endl;
	}
}
Example #18
0
inline void outer_panel_mixed_gemm_impl_tn(
        const double alpha,
        const SpParMat<index_type, value_type, SpDCCols<index_type, value_type> > &A,
        const El::DistMatrix<value_type, col_d, El::STAR> &S,
        const double beta,
        El::DistMatrix<value_type, El::STAR, El::STAR> &C) {

    El::DistMatrix<value_type, El::STAR, El::STAR>
        tmp_C(C.Height(), C.Width());
    El::Zero(tmp_C);

    utility::combblas_slab_view_t<index_type, value_type> cbview(A, false);

    //FIXME: factor
    size_t slab_size = 2 * S.Grid().Height();
    for(size_t cur_row_idx = 0; cur_row_idx < cbview.ncols();
        cur_row_idx += slab_size) {

        size_t cur_slab_size =
            std::min(slab_size, cbview.ncols() - cur_row_idx);

        // get the next slab_size columns of B
        El::DistMatrix<value_type, El::STAR, El::STAR>
            A_row(cur_slab_size, S.Height());

        // transpose is column
        //cbview.extract_elemental_column_slab_view(A_row, cur_slab_size);
        cbview.extract_full_slab_view(cur_slab_size);

        // matrix mult (FIXME only iter nz)
        for(size_t l_row_idx = 0; l_row_idx < A_row.LocalHeight();
            ++l_row_idx) {

            size_t g_row_idx = l_row_idx * A_row.ColStride()
                               + A_row.ColShift() + cur_row_idx;

            for(size_t l_col_idx = 0; l_col_idx < A_row.LocalWidth();
                l_col_idx++) {

                //XXX: should be the same as l_col_idx
                size_t g_col_idx = l_col_idx * A_row.RowStride()
                                   + A_row.RowShift();

                // continue if we don't own values in S in this row
                if(!S.IsLocalRow(g_col_idx))
                    continue;

                //get transposed value
                value_type val = alpha * cbview(g_col_idx, g_row_idx);

                for(size_t s_col_idx = 0; s_col_idx < S.LocalWidth();
                    s_col_idx++) {

                    tmp_C.UpdateLocal(g_row_idx, s_col_idx,
                                val * S.GetLocal(S.LocalRow(g_col_idx), s_col_idx));
                }
            }
        }
    }

    //FIXME: scaling
    if(A.getcommgrid()->GetRank() == 0) {
        for(size_t col_idx = 0; col_idx < C.Width(); col_idx++)
            for(size_t row_idx = 0; row_idx < C.Height(); row_idx++)
                tmp_C.UpdateLocal(row_idx, col_idx,
                        beta * C.GetLocal(row_idx, col_idx));
    }

    //FIXME: Use utility getter
    boost::mpi::communicator world(
            A.getcommgrid()->GetWorld(), boost::mpi::comm_duplicate);
    boost::mpi::all_reduce (world,
                        tmp_C.LockedBuffer(),
                        C.Height() * C.Width(),
                        C.Buffer(),
                        std::plus<value_type>());
}
Example #19
0
void L1DistanceMatrixTU(El::UpperOrLower uplo,
    direction_t dirA, direction_t dirB, T alpha,
    const El::DistMatrix<T, El::STAR, El::MC> &A,
    const El::DistMatrix<T, El::STAR, El::MR> &B,
    T beta, El::DistMatrix<T> &C) {

    // TODO verify sizes

    const T *a = A.LockedBuffer();
    El::Int ldA = A.LDim();

    const T *b = B.LockedBuffer();
    El::Int ldB = B.LDim();

    T *c = C.Buffer();
    El::Int ldC = C.LDim();

    El::Int d = A.Height();

    /* Not the most efficient way... but mimicking BLAS is too much work! */
    if (dirA == base::COLUMNS && dirB == base::COLUMNS) {
        El::Int n = C.LocalWidth();
        El::Int m = C.LocalHeight();
        for (El::Int j = 0; j < n; j++)
            for(El::Int i =
                    ((uplo == El::UPPER) ? 0 : C.LocalRowOffset(A.GlobalCol(j)));
                i < ((uplo == El::UPPER) ? C.LocalRowOffset(A.GlobalCol(j) + 1) : m); i++) {

                T v = 0.0;
                for (El::Int k = 0; k < d; k++)
                    v += std::abs(b[j * ldB + k] - a[i * ldA + k]);
                c[j * ldC + i] = beta * c[j * ldC + i] + alpha * v;
            }

    }

    // TODO the rest of the cases.
}
Example #20
0
inline void inner_panel_mixed_gemm_impl_nn(
        const double alpha,
        const SpParMat<index_type, value_type, SpDCCols<index_type, value_type> > &A,
        const El::DistMatrix<value_type, El::STAR, El::STAR> &S,
        const double beta,
        El::DistMatrix<value_type, col_d, El::STAR> &C) {

    int n_proc_side   = A.getcommgrid()->GetGridRows();
    int output_width  = S.Width();
    int output_height = A.getnrow();

    size_t rank = A.getcommgrid()->GetRank();
    size_t cb_row_offset = utility::cb_my_row_offset(A);

    typedef SpDCCols< index_type, value_type > col_t;
    typedef SpParMat< index_type, value_type, col_t > matrix_type;
    matrix_type &_A = const_cast<matrix_type&>(A);
    col_t &data = _A.seq();

    // 1) compute the local values still using the CombBLAS distribution (2D
    //    processor grid). We assume the result is dense.
    std::vector<double> local_matrix;
    mixed_gemm_local_part_nn(alpha, A, S, 0.0, local_matrix);

    // 2) reduce first along rows so that each processor owns the values in
    //    the output row of the SOMETHING/* matrix and values for processors in
    //    the same processor column.
    boost::mpi::communicator my_row_comm(
            A.getcommgrid()->GetRowWorld(), boost::mpi::comm_duplicate);

    // storage for other procs in same row communicator: rank -> (row, values)
    typedef std::vector<std::pair<int, std::vector<double> > > for_rank_t;
    std::vector<for_rank_t> for_rank(n_proc_side);

    for(size_t local_row = 0; local_row < data.getnrow(); ++local_row) {

        size_t row = local_row + cb_row_offset;

        // the owner for VR/* and VC/* matrices is independent of the column
        size_t target_proc = utility::owner(C, row, static_cast<size_t>(0));

        // if the target processor is not in the current row communicator, get
        // the value in the processor grid sharing the same row.
        if(!A.getcommgrid()->OnSameProcRow(target_proc))
            target_proc = static_cast<int>(rank / n_proc_side) *
                            n_proc_side + target_proc % n_proc_side;

        size_t target_row_rank = A.getcommgrid()->GetRankInProcRow(target_proc);

        // reduce partial row (FIXME: if the resulting matrix is still
        // expected to be sparse, change this to communicate only nnz).
        // Working on local_width columns concurrently per column processing
        // group.
        size_t local_width = S.Width();
        const value_type* buffer = &local_matrix[local_row * local_width];
        std::vector<value_type> new_values(local_width);
        boost::mpi::reduce(my_row_comm, buffer, local_width,
                &new_values[0], std::plus<value_type>(), target_row_rank);

        // processor stores result directly if it is the owning rank of that
        // row, save for subsequent communication along rows otherwise
        if(rank == utility::owner(C, row, static_cast<size_t>(0))) {
            int elem_lrow = C.LocalRow(row);
            for(size_t idx = 0; idx < local_width; ++idx) {
                int elem_lcol = C.LocalCol(idx);
                C.SetLocal(elem_lrow, elem_lcol,
                    new_values[idx] + beta * C.GetLocal(elem_lrow, elem_lcol));
            }
        } else if (rank == target_proc) {
            // store for later comm across rows
            for_rank[utility::owner(C, row, static_cast<size_t>(0)) / n_proc_side].push_back(
                    std::make_pair(row, new_values));
        }
    }

    // 3) gather remaining values along rows: we exchange all the values with
    //    other processors in the same communicator row and then add them to
    //    our local part.
    boost::mpi::communicator my_col_comm(
            A.getcommgrid()->GetColWorld(), boost::mpi::comm_duplicate);

    std::vector<for_rank_t> new_values;
    for(int i = 0; i < n_proc_side; ++i)
        boost::mpi::gather(my_col_comm, for_rank[i], new_values, i);

    // insert new values
    for(size_t proc = 0; proc < new_values.size(); ++proc) {
        const for_rank_t &cur  = new_values[proc];

        for(size_t i = 0; i < cur.size(); ++i) {
            int elem_lrow = C.LocalRow(cur[i].first);
            for(size_t j = 0; j < cur[i].second.size(); ++j) {
                size_t elem_lcol = C.LocalCol(j);
                C.SetLocal(elem_lrow, elem_lcol,
                        cur[i].second[j] + beta *
                        C.GetLocal(elem_lrow, elem_lcol));
            }
        }
    }
}
Example #21
0
int Height(const El::DistMatrix<T, U, V>& A) {
    return A.Height();
}
index_type owner(const El::DistMatrix<value_type, El::VC, El::STAR> &A,
                 const index_type row_idx, const index_type col_idx) {

    return row_idx % (A.Grid().Height() * A.Grid().Width());
}
Example #23
0
mpi::communicator get_communicator(const El::DistMatrix<T, U, V>& A,
    mpi::comm_create_kind kind = mpi::comm_attach) {
    return mpi::communicator(A.DistComm().comm, kind);
}