Beispiel #1
0
void l2r_rank_fun::grad(double *w, double *g)
{
	int i;
	int l=prob->l;
	double *lg = new double[l];
	double *tmp_vector = new double[l];
	double *gtmp_vector = new double[global_l];

#pragma omp parallel for default(shared) private(i)
	for (i=0;i<l;i++)
	{
		tmp_vector[i] = ATAQb[i] - ATe[i];
	}

	MPI_Allgatherv((void*)tmp_vector, l, MPI_DOUBLE, (void*)gtmp_vector, local_l, start_ptr, MPI_DOUBLE, MPI_COMM_WORLD);
	Qv(gtmp_vector, lg);
	MPI_Allgatherv((void*)lg, l, MPI_DOUBLE, (void*)g, local_l, start_ptr, MPI_DOUBLE, MPI_COMM_WORLD);

#pragma omp parallel for default(shared) private(i)
	for(i=0;i<global_l;i++)
	{
		g[i] = gz[i] + 2*C*g[i];
	}

	delete[] tmp_vector;
	delete[] gtmp_vector;
	delete[] lg;
}
Beispiel #2
0
static void sharp_make_mpi_info (MPI_Comm comm, const sharp_job *job,
  sharp_mpi_info *minfo)
  {
  minfo->comm = comm;
  MPI_Comm_size (comm, &minfo->ntasks);
  MPI_Comm_rank (comm, &minfo->mytask);

  minfo->nm=RALLOC(int,minfo->ntasks);
  MPI_Allgather ((int *)(&job->ainfo->nm),1,MPI_INT,minfo->nm,1,MPI_INT,comm);
  minfo->ofs_m=RALLOC(int,minfo->ntasks+1);
  minfo->ofs_m[0]=0;
  for (int i=1; i<=minfo->ntasks; ++i)
    minfo->ofs_m[i] = minfo->ofs_m[i-1]+minfo->nm[i-1];
  minfo->nmtotal=minfo->ofs_m[minfo->ntasks];
  minfo->mval=RALLOC(int,minfo->nmtotal);
  MPI_Allgatherv(job->ainfo->mval, job->ainfo->nm, MPI_INT, minfo->mval,
    minfo->nm, minfo->ofs_m, MPI_INT, comm);

  minfo->mmax=sharp_get_mmax(minfo->mval,minfo->nmtotal);

  minfo->npair=RALLOC(int,minfo->ntasks);
  MPI_Allgather ((int *)(&job->ginfo->npairs), 1, MPI_INT, minfo->npair, 1,
    MPI_INT, comm);
  minfo->ofs_pair=RALLOC(int,minfo->ntasks+1);
  minfo->ofs_pair[0]=0;
  for (int i=1; i<=minfo->ntasks; ++i)
    minfo->ofs_pair[i] = minfo->ofs_pair[i-1]+minfo->npair[i-1];
  minfo->npairtotal=minfo->ofs_pair[minfo->ntasks];

  double *theta_tmp=RALLOC(double,job->ginfo->npairs);
  int *ispair_tmp=RALLOC(int,job->ginfo->npairs);
  for (int i=0; i<job->ginfo->npairs; ++i)
    {
    theta_tmp[i]=job->ginfo->pair[i].r1.theta;
    ispair_tmp[i]=job->ginfo->pair[i].r2.nph>0;
    }
  minfo->theta=RALLOC(double,minfo->npairtotal);
  minfo->ispair=RALLOC(int,minfo->npairtotal);
  MPI_Allgatherv(theta_tmp, job->ginfo->npairs, MPI_DOUBLE, minfo->theta,
    minfo->npair, minfo->ofs_pair, MPI_DOUBLE, comm);
  MPI_Allgatherv(ispair_tmp, job->ginfo->npairs, MPI_INT, minfo->ispair,
    minfo->npair, minfo->ofs_pair, MPI_INT, comm);
  DEALLOC(theta_tmp);
  DEALLOC(ispair_tmp);

  minfo->nph=2*job->nmaps*job->ntrans;

  minfo->almcount=RALLOC(int,minfo->ntasks);
  minfo->almdisp=RALLOC(int,minfo->ntasks+1);
  minfo->mapcount=RALLOC(int,minfo->ntasks);
  minfo->mapdisp=RALLOC(int,minfo->ntasks+1);
  minfo->almdisp[0]=minfo->mapdisp[0]=0;
  for (int i=0; i<minfo->ntasks; ++i)
    {
    minfo->almcount[i] = 2*minfo->nph*minfo->nm[minfo->mytask]*minfo->npair[i];
    minfo->almdisp[i+1] = minfo->almdisp[i]+minfo->almcount[i];
    minfo->mapcount[i] = 2*minfo->nph*minfo->nm[i]*minfo->npair[minfo->mytask];
    minfo->mapdisp[i+1] = minfo->mapdisp[i]+minfo->mapcount[i];
    }
  }
Beispiel #3
0
   void invoke() {

    if (!has_contiguous_data(lhs)) TRIQS_RUNTIME_ERROR << "mpi gather of array into a non contiguous view";

    auto c = laz.c;
    auto recvcounts = std::vector<int>(c.size());
    auto displs = std::vector<int>(c.size() + 1, 0);
    int sendcount = laz.ref.domain().number_of_elements();
    auto D = mpi::mpi_datatype<typename A::value_type>();

    auto d = laz.domain();
    if (laz.all || (laz.c.rank() == laz.root)) resize_or_check_if_view(lhs, d.lengths());

    void *lhs_p = lhs.data_start();
    const void *rhs_p = laz.ref.data_start();

    auto mpi_ty = mpi::mpi_datatype<int>();
    if (!laz.all)
     MPI_Gather(&sendcount, 1, mpi_ty, &recvcounts[0], 1, mpi_ty, laz.root, c.get());
    else
     MPI_Allgather(&sendcount, 1, mpi_ty, &recvcounts[0], 1, mpi_ty, c.get());

    for (int r = 0; r < c.size(); ++r) displs[r + 1] = recvcounts[r] + displs[r];

    if (!laz.all)
     MPI_Gatherv((void *)rhs_p, sendcount, D, lhs_p, &recvcounts[0], &displs[0], D, laz.root, c.get());
    else
     MPI_Allgatherv((void *)rhs_p, sendcount, D, lhs_p, &recvcounts[0], &displs[0], D, c.get());
   }
Beispiel #4
0
void PETScVector::gatherLocalVectors( PetscScalar local_array[],
                                      PetscScalar global_array[])
{
    // Collect vectors from processors.
    int size_rank;
    MPI_Comm_size(PETSC_COMM_WORLD, &size_rank);

    // number of elements to be sent for each rank
    std::vector<PetscInt>  i_cnt(size_rank);

    MPI_Allgather(&_size_loc, 1, MPI_INT, &i_cnt[0], 1, MPI_INT, PETSC_COMM_WORLD);

    // collect local array
    PetscInt offset = 0;
    // offset in the receive vector of the data from each rank
    std::vector<PetscInt>  i_disp(size_rank);
    for(PetscInt i=0; i<size_rank; i++)
    {
        i_disp[i] = offset;
        offset += i_cnt[i];
    }

    MPI_Allgatherv(local_array, _size_loc, MPI_DOUBLE,
                   global_array, &i_cnt[0], &i_disp[0], MPI_DOUBLE, PETSC_COMM_WORLD);

}
Beispiel #5
0
    static void all_gather(const communicator& comm, const std::vector<T>& in, std::vector< std::vector<T> >& out)
    {
      std::vector<int>  counts(comm.size());
      Collectives<int,void*>::all_gather(comm, (int) in.size(), counts);

      std::vector<int>  offsets(comm.size(), 0);
      for (unsigned i = 1; i < offsets.size(); ++i)
        offsets[i] = offsets[i-1] + counts[i-1];

      std::vector<T> buffer(offsets.back() + counts.back());
      MPI_Allgatherv(Datatype::address(const_cast<T&>(in[0])),
                     in.size(),
                     Datatype::datatype(),
                     Datatype::address(buffer[0]),
                     &counts[0],
                     &offsets[0],
                     Datatype::datatype(),
                     comm);

      out.resize(comm.size());
      size_t cur = 0;
      for (int i = 0; i < comm.size(); ++i)
      {
          out[i].reserve(counts[i]);
          for (int j = 0; j < counts[i]; ++j)
              out[i].push_back(buffer[cur++]);
      }
    }
Beispiel #6
0
int idft(float complex *dst, float complex* src, float complex* w1, float complex* w2, float complex* tmp, int N, int M, int start, int end, int* cnt, int* disp, MPI_Datatype mpi_complexf, int rank) {

	int k, l, m, n;

	for (n = start; n<end; n++) {
		int nMl = n*M;
		for (l=0; l<M; l++) {
			int lm = 0;
    		int nMm = n*M;
			tmp[nMl] = 0.0;
			for (m = 0; m<M; m++) {
				tmp[nMl] += src[nMm]/w1[lm]/M;
				nMm ++;
				lm += l;
			}
			nMl ++;
		}
	}
	MPI_Allgatherv(tmp + disp[rank], cnt[rank], mpi_complexf, tmp, cnt, disp, mpi_complexf, MPI_COMM_WORLD);
	for (k=start; k<end; k++) {
		int nMl = 0;
		
		for (n = 0; n<N; n++) {
			int kMl = k*M;
			for (l=0; l<M; l++) {
				if (n == 0)
					dst[kMl] = 0.0;
				dst[kMl] += tmp[n*M+l]/w2[n*k]/N;
				kMl ++;
			}
		}
	}
	return 0;
}
Beispiel #7
0
static PetscErrorCode ISGatherTotal_Private(IS is)
{
  PetscErrorCode ierr;
  PetscInt       i,n,N;
  const PetscInt *lindices;
  MPI_Comm       comm;
  PetscMPIInt    rank,size,*sizes = NULL,*offsets = NULL,nn;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(is,IS_CLASSID,1);

  ierr = PetscObjectGetComm((PetscObject)is,&comm);CHKERRQ(ierr);
  ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
  ierr = ISGetLocalSize(is,&n);CHKERRQ(ierr);
  ierr = PetscMalloc2(size,PetscMPIInt,&sizes,size,PetscMPIInt,&offsets);CHKERRQ(ierr);

  ierr = PetscMPIIntCast(n,&nn);CHKERRQ(ierr);
  ierr = MPI_Allgather(&nn,1,MPI_INT,sizes,1,MPI_INT,comm);CHKERRQ(ierr);
  offsets[0] = 0;
  for (i=1; i<size; ++i) offsets[i] = offsets[i-1] + sizes[i-1];
  N = offsets[size-1] + sizes[size-1];

  ierr = PetscMalloc(N*sizeof(PetscInt),&(is->total));CHKERRQ(ierr);
  ierr = ISGetIndices(is,&lindices);CHKERRQ(ierr);
  ierr = MPI_Allgatherv((void*)lindices,nn,MPIU_INT,is->total,sizes,offsets,MPIU_INT,comm);CHKERRQ(ierr);
  ierr = ISRestoreIndices(is,&lindices);CHKERRQ(ierr);
  is->local_offset = offsets[rank];
  ierr = PetscFree2(sizes,offsets);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #8
0
void mpla_copy_distributed_vector_to_cpu(double* x_cpu, struct mpla_vector* x, struct mpla_instance* instance)
{
        // create sub-communicator for each process column
        int remain_dims[2];
        remain_dims[0]=1;
        remain_dims[1]=0;
        MPI_Comm column_comm;
        MPI_Cart_sub(instance->comm, remain_dims, &column_comm);
        int column_rank;
        MPI_Comm_rank(column_comm, &column_rank);

        // columnwise creation of the full vector
        double* full_vector = x_cpu;
        int* recvcounts = new int[instance->proc_rows];
        int* displs = new int[instance->proc_rows];
        for (int i=0; i<instance->proc_rows; i++)
        {
                recvcounts[i] = x->proc_row_count[i][instance->cur_proc_col];
                displs[i] = x->proc_row_offset[i][instance->cur_proc_col];
        }
//        cudaMalloc((void**)&full_vector, sizeof(double)*x->vec_row_count);
//        cudaThreadSynchronize();
//        checkCUDAError("cudaMalloc");
        MPI_Allgatherv(x->data, x->cur_proc_row_count, MPI_DOUBLE, full_vector, recvcounts, displs, MPI_DOUBLE, column_comm);

        // memory cleanup
        MPI_Comm_free(&column_comm);

        MPI_Barrier(instance->comm);
}
Beispiel #9
0
void ompi_allgatherv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype,
		      char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs,
		      MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *ierr)
{
    MPI_Comm c_comm;
    MPI_Datatype c_sendtype, c_recvtype;
    int size, ierr_c;
    OMPI_ARRAY_NAME_DECL(recvcounts);
    OMPI_ARRAY_NAME_DECL(displs);

    c_comm = MPI_Comm_f2c(*comm);
    c_sendtype = MPI_Type_f2c(*sendtype);
    c_recvtype = MPI_Type_f2c(*recvtype);

    MPI_Comm_size(c_comm, &size);
    OMPI_ARRAY_FINT_2_INT(recvcounts, size);
    OMPI_ARRAY_FINT_2_INT(displs, size);

    sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf);
    sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf);
    recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf);

    ierr_c = MPI_Allgatherv(sendbuf,
                            OMPI_FINT_2_INT(*sendcount),
                            c_sendtype,
                            recvbuf,
                            OMPI_ARRAY_NAME_CONVERT(recvcounts),
                            OMPI_ARRAY_NAME_CONVERT(displs),
                            c_recvtype, c_comm);

    if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c);

    OMPI_ARRAY_FINT_2_INT_CLEANUP(recvcounts);
    OMPI_ARRAY_FINT_2_INT_CLEANUP(displs);
}
Beispiel #10
0
HYPRE_Int
hypre_MPI_Allgatherv( void               *sendbuf,
                      HYPRE_Int           sendcount,
                      hypre_MPI_Datatype  sendtype,
                      void               *recvbuf,
                      HYPRE_Int          *recvcounts,
                      HYPRE_Int          *displs, 
                      hypre_MPI_Datatype  recvtype,
                      hypre_MPI_Comm      comm ) 
{
   hypre_int *mpi_recvcounts, *mpi_displs, csize;
   HYPRE_Int  i;
   HYPRE_Int  ierr;

   MPI_Comm_size(comm, &csize);
   mpi_recvcounts = hypre_TAlloc(hypre_int, csize);
   mpi_displs = hypre_TAlloc(hypre_int, csize);
   for (i = 0; i < csize; i++)
   {
      mpi_recvcounts[i] = (hypre_int) recvcounts[i];
      mpi_displs[i] = (hypre_int) displs[i];
   }
   ierr = (HYPRE_Int) MPI_Allgatherv(sendbuf, (hypre_int)sendcount, sendtype,
                                     recvbuf, mpi_recvcounts, mpi_displs, 
                                     recvtype, comm);
   hypre_TFree(mpi_recvcounts);
   hypre_TFree(mpi_displs);

   return ierr;
}
Beispiel #11
0
void PS_get_diag(ParaSparse *A, double *d)
{
	int n;
	
	int *nas, *nds;
	double *diag_loc;
	
	nas = (int *) malloc(A->size * sizeof(int));
	nds = (int *) malloc(A->size * sizeof(int));
	diag_loc = (double *) malloc(A->nd * sizeof(double));
	
	for(n = 0; n < A->nd; n++)
		diag_loc[n] = 0; //TODO: memset?
	
	for(n = 0; n < A->Ne; n++)
		if(A->i[n] == A->j[n])
			diag_loc[A->i[n] - A->na] += A->Mij[n];
	
	MPI_Allgather(&(A->na), 1, MPI_INT, nas, 1, MPI_INT, A->comm);
	MPI_Allgather(&(A->nd), 1, MPI_INT, nds, 1, MPI_INT, A->comm);
	
	MPI_Allgatherv(diag_loc, A->nd, MPI_DOUBLE,
				   d, nds, nas, MPI_DOUBLE, A->comm);
	
	free(nas);
	free(nds);
	free(diag_loc);
}
Beispiel #12
0
/*
 * Routine to communicate eigenvalues such that every process has
 * all computed eigenvalues (iu-il+1) in W; this routine is designed 
 * to be called right after 'pmrrr'.
 */
int PMR_comm_eigvals(MPI_Comm comm, int *nz, int *myfirstp, double *W)
{
  MPI_Comm comm_dup;
  MPI_Comm_dup(comm, &comm_dup);
  int nproc;
  MPI_Comm_size(comm_dup, &nproc);

  int *rcount = (int*)malloc(nproc*sizeof(int)); assert(rcount!=NULL);
  int *rdispl = (int*)malloc(nproc*sizeof(int)); assert(rdispl!=NULL);
  double *work = (double*)malloc((*nz+1)*sizeof(double)); assert(work!=NULL);

  if (*nz > 0)
    memcpy(work, W, (*nz)*sizeof(double) );

  MPI_Allgather(nz, 1, MPI_INT, rcount, 1, MPI_INT, comm_dup);

  MPI_Allgather(myfirstp, 1, MPI_INT, rdispl, 1, MPI_INT, comm_dup);
  
  MPI_Allgatherv
  (work, *nz, MPI_DOUBLE, W, rcount, rdispl, MPI_DOUBLE, comm_dup);

  MPI_Comm_free(&comm_dup);
  free(rcount);
  free(rdispl);
  free(work);

  return 0;
}
Beispiel #13
0
Datei: BLAS3.c Projekt: sclc/cacg
void spmm_csr_info_data_sep_BCBCG(csrType_local csr_mat, denseType dense_mat_info, double * dataSrc, int dataDisp
        , denseType *res_mat, int myid, int numprocs) {

    int ierr;
    int idx;
    // gather all data from all processes
    int recv_count[numprocs];
    int displs[numprocs];

    int local_num_row_normal = dense_mat_info.global_num_row / numprocs;
    int local_num_col_normal = dense_mat_info.local_num_col;
    int normal_num_elements = local_num_row_normal * local_num_col_normal;

    // recvBuf
    double * recvBuf = (double*)calloc( dense_mat_info.global_num_col * dense_mat_info.global_num_row, sizeof(double));
    // values allocated by calloc() is initialized to zero
    double *res_buffer = (double *) calloc(res_mat->local_num_col * res_mat->local_num_row, sizeof (double));

    for (idx = 0; idx < numprocs; idx++) {
        recv_count[idx] = normal_num_elements;
        displs[idx] = idx * normal_num_elements;

        if (idx == (numprocs - 1)) {
            recv_count[idx] = (dense_mat_info.global_num_row - local_num_row_normal * (numprocs - 1))
                    * local_num_col_normal;
        }
    }
 
    ierr = MPI_Allgatherv((void *) (dataSrc+dataDisp), dense_mat_info.local_num_col * dense_mat_info.local_num_row, MPI_DOUBLE
            , recvBuf, recv_count, displs
            , MPI_DOUBLE, MPI_COMM_WORLD);


    // spmv using csr format
    int idx_row;
    for (idx_row = 0; idx_row < csr_mat.num_rows; idx_row++) {
        int row_start_idx = csr_mat.row_start[idx_row];
        int row_end_idx = csr_mat.row_start[idx_row + 1];

        int idx_data;
        for (idx_data = row_start_idx; idx_data < row_end_idx; idx_data++) {
            int col_idx = csr_mat.col_idx[idx_data];
            double csr_data = csr_mat.csrdata[idx_data];
            int block_size = dense_mat_info.global_num_col;
            int block_idx;
            for (block_idx = 0; block_idx < block_size; block_idx++) {
                res_buffer[idx_row * res_mat->local_num_col + block_idx] +=
                        csr_data * recvBuf[col_idx * dense_mat_info.global_num_col + block_idx];
            }
        }
    }
    // Data zone changes
    if (res_mat->data != 0) {
        free(res_mat->data);
    } else {
        exit(0);
    }
    res_mat->data = res_buffer;

}
Beispiel #14
0
Datei: BLAS3.c Projekt: sclc/cacg
void spmm_csr_v2(csrType_local csr_mat, denseType dense_mat, denseType *res_mat, int myid, int numprocs) {
    int ierr;
    int idx;
    // gather all data from all processes
    int recv_count[numprocs];
    int displs[numprocs];

    int local_num_row_normal = dense_mat.global_num_row / numprocs;
    int local_num_col_normal = dense_mat.global_num_col;
    int normal_num_elements = local_num_row_normal * local_num_col_normal;

    double *recv_buffer = (double*)calloc(dense_mat.global_num_col * dense_mat.global_num_row, sizeof(double));
    // values allocated by calloc() is initialized to zero
    double *res_buffer = (double *) calloc(res_mat->local_num_col * res_mat->local_num_row, sizeof (double));

    for (idx = 0; idx < numprocs; idx++) {
        recv_count[idx] = normal_num_elements;
        displs[idx] = idx * normal_num_elements;

        if (idx == (numprocs - 1)) {
            recv_count[idx] = (dense_mat.global_num_row - local_num_row_normal * (numprocs - 1))
                    * local_num_col_normal;
        }
    }
    //    ierr = MPI_Barrier(MPI_COMM_WORLD);
    ierr = MPI_Allgatherv((void *) dense_mat.data, dense_mat.local_num_col * dense_mat.local_num_row, MPI_DOUBLE
            , recv_buffer, recv_count, displs
            , MPI_DOUBLE, MPI_COMM_WORLD);

    // spmm using csr format
    int idx_row;
#ifdef SPMM_CAL_DEBUG_2
    printf("in BLAS3.c, myid=%d,number of row: %d\n", myid, csr_mat.num_rows);
#endif
    for (idx_row = 0; idx_row < csr_mat.num_rows; idx_row++) {
        int row_start_idx = csr_mat.row_start[idx_row];
        int row_end_idx = csr_mat.row_start[idx_row + 1];

        int idx_data;
        for (idx_data = row_start_idx; idx_data < row_end_idx; idx_data++) {
            int col_idx = csr_mat.col_idx[idx_data];
            double csr_data = csr_mat.csrdata[idx_data];
            int block_size = dense_mat.local_num_col;
            int block_idx;
            for (block_idx = 0; block_idx < block_size; block_idx++) {
                res_buffer[idx_row * res_mat->local_num_col + block_idx] +=
                        csr_data * recv_buffer[col_idx * dense_mat.local_num_col + block_idx];
            }
        }
    }
    if (res_mat->data != 0) {
        free(res_mat->data);
    } else {
        exit(0);
    }
    res_mat->data = res_buffer;
    
    free(recv_buffer);

}
Beispiel #15
0
int
hypre_thread_MPI_Allgatherv( void        *sendbuf,
                int          sendcount,
                MPI_Datatype sendtype,
                void        *recvbuf,
                int         *recvcounts,
                int         *displs, 
                MPI_Datatype recvtype,
                MPI_Comm     comm       ) 
{ 
  int i,returnval;
  int unthreaded = pthread_equal(initial_thread,pthread_self());
  int I_call_mpi = unthreaded || pthread_equal(hypre_thread[0],pthread_self());
  hypre_barrier(&mpi_mtx, unthreaded);
  if (I_call_mpi)
  {
    returnval=MPI_Allgatherv(sendbuf,sendcount,sendtype,recvbuf,recvcounts,
			     displs,recvtype,comm);
  }
  else
  {
     returnval=0;
  }
  hypre_barrier(&mpi_mtx, unthreaded);
  return returnval;
}
Beispiel #16
0
double time_allgatherv(struct collParams* p)
{
    int i, size2;
    int disp = 0;
    for ( i = 0; i < p->nranks; i++) {
        int size2 = i % (p->size+1);
        recvcounts[i] = size2;
        rdispls[i] = disp;
        disp += size2;
    }
    MPI_Barrier(MPI_COMM_WORLD);

    size2 = p->myrank % (p->size+1);
    __TIME_START__;
    for (i = 0; i < p->iter; i++) {
        MPI_Allgatherv(sbuffer, size2, p->type, rbuffer, recvcounts, rdispls, p->type, p->comm);
        __BAR__(p->comm);
    }
    __TIME_END__;

    if (check_buffers) {
        check_sbuffer(p->myrank);
        for (i = 0; i < p->nranks; i++) {
            check_rbuffer(rbuffer, rdispls[i], i, 0, recvcounts[i]);
        }
    }

    return __TIME_USECS__ / (double)p->iter;
}
Beispiel #17
0
int main(int argc, char *argv[])
{
  int rank, size;
  int i;
  int *sb;
  int *rb;
  int *recv_counts;
  int *recv_disps;
  int recv_sb_size;
  int status;

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &size);

  recv_counts = (int *) xbt_malloc(size * sizeof(int));
  recv_disps = (int *) xbt_malloc(size * sizeof(int));
  
  recv_sb_size = 0;
  for (i = 0; i < size; i++) {
    recv_counts[i] = i + 1;
    recv_disps[i] = recv_sb_size;    
    recv_sb_size += i + 1;
  }

  sb = (int *) xbt_malloc(recv_counts[rank] * sizeof(int));
  rb = (int *) xbt_malloc(recv_sb_size * sizeof(int));

  for (i = 0; i < recv_counts[rank]; ++i)
    sb[i] = recv_disps[rank] + i;
  for (i = 0; i < recv_sb_size; ++i)  
    rb[i] = -1;

  printf("[%d] sndbuf=[", rank);
  for (i = 0; i < recv_counts[rank]; i++)
    printf("%d ", sb[i]);
  printf("]\n");

  status = MPI_Allgatherv(sb, recv_counts[rank], MPI_INT, rb, recv_counts, recv_disps, MPI_INT, MPI_COMM_WORLD);

  printf("[%d] rcvbuf=[", rank);
  for (i = 0; i < recv_sb_size; i++)
    printf("%d ", rb[i]);
  printf("]\n");


  if (rank == 0) {
    if (status != MPI_SUCCESS) {
      printf("allgatherv returned %d\n", status);
      fflush(stdout);
    }
  }
  free(sb);
  free(rb);
  free(recv_counts);
  free(recv_disps);
  MPI_Finalize();
  return (EXIT_SUCCESS);
}
Beispiel #18
0
void allgatherv<spike::spike_interface_stats_collector>(spike::spike_interface_stats_collector& d, MPI_Datatype spike){
    double t0,t1;
    t0 = MPI_Wtime();
    MPI_Allgatherv(&(d.spikeout_[0]), d.spikeout_.size(), spike,
        &(d.spikein_[0]), &(d.nin_[0]), &(d.displ_[0]), spike, MPI_COMM_WORLD);
    t1 = MPI_Wtime();
    d.allgather_v_times_.push_back(std::make_pair(d.spikeout_.size(), t1-t0));
}
Beispiel #19
0
 void allgatherv(MPI_Comm comm, double *send_buf, int send_count,
                 double *rec_buf, int *rec_counts, int *displacements)
 {
   int error = MPI_Allgatherv(send_buf, send_count, MPI_DOUBLE,
                              rec_buf, rec_counts, displacements, MPI_DOUBLE,
                              comm);
   if (error != MPI_SUCCESS) throw ATC_Error("error in allgatherv "+to_string(error));
 }
Beispiel #20
0
void mpi_allgatherv (void *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, 
		     void *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, 
		     MPI_Fint *recvtype, MPI_Fint *comm, MPI_Fint *__ierr)
{
  *__ierr = MPI_Allgatherv (sendbuf, *sendcount, MPI_Type_f2c (*sendtype),
			    recvbuf, recvcounts, displs, 
			    MPI_Type_f2c (*recvtype), MPI_Comm_f2c (*comm));
}
Beispiel #21
0
FC_FUNC( mpi_allgatherv , MPI_ALLGATHERV )
                          ( void *sendbuf, int *sendcount, int *sendtype,
			    void *recvbuf, int *recvcounts, int *displs,
                            int *recvtype, int *comm, int *ierror)
{
  *ierror=MPI_Allgatherv( mpi_c_in_place(sendbuf), *sendcount, *sendtype,
			  recvbuf, recvcounts, displs,
                          *recvtype, *comm );
}
Beispiel #22
0
int Allgatherv(MPI_Comm comm,
               std::vector<T>& sendbuf,
               std::vector<int>& recvLengths,
               std::vector<T>& recvbuf)
{
#ifdef FEI_SER
  //If we're in serial mode, just copy sendbuf to recvbuf and return.

  recvbuf = sendbuf;
  recvLengths.resize(1);
  recvLengths[0] = sendbuf.size();
#else
  int numProcs = 1;
  MPI_Comm_size(comm, &numProcs);

  try {

  MPI_Datatype mpi_dtype = fei::mpiTraits<T>::mpi_type();

  std::vector<int> tmpInt(numProcs, 0);

  int len = sendbuf.size();
  int* tmpBuf = &tmpInt[0];

  recvLengths.resize(numProcs);
  int* recvLenPtr = &recvLengths[0];

  CHK_MPI( MPI_Allgather(&len, 1, MPI_INT, recvLenPtr, 1, MPI_INT, comm) );

  int displ = 0;
  for(int i=0; i<numProcs; i++) {
    tmpBuf[i] = displ;
    displ += recvLenPtr[i];
  }

  if (displ == 0) {
    recvbuf.resize(0);
    return(0);
  }

  recvbuf.resize(displ);

  T* sendbufPtr = sendbuf.size()>0 ? &sendbuf[0] : NULL;
  
  CHK_MPI( MPI_Allgatherv(sendbufPtr, len, mpi_dtype,
			&recvbuf[0], &recvLengths[0], tmpBuf,
			mpi_dtype, comm) );

  }
  catch(std::runtime_error& exc) {
    fei::console_out() << exc.what() << FEI_ENDL;
    return(-1);
  }
#endif

  return(0);
}
Beispiel #23
0
// MPI_Allreduce with Reduce_scatter and Allgatherv
inline void execute_GL_Allreduce_as_ReducescatterAllgatherv(collective_params_t* params) {

    MPI_Reduce_scatter(params->sbuf, params->tmp_buf, params->counts_array,
            params->datatype, params->op, MPI_COMM_WORLD);

    MPI_Allgatherv(params->tmp_buf, params->count, params->datatype,
            params->rbuf, params->counts_array, params->displ_array, params->datatype,
            MPI_COMM_WORLD);

}
int oshmem_shmem_allgatherv(void *send_buf, void* rcv_buf, int send_count,
                            int* rcv_size, int* displs)
{
    int rc;

    rc = MPI_Allgatherv(send_buf, send_count, MPI_BYTE,
                         rcv_buf, rcv_size, displs, MPI_BYTE, oshmem_comm_world);

    return rc;
}
int PLA_MPI_Allgatherv(		/* aka Collect */
	void *		sendbuf,
	int 		scount, 
	MPI_Datatype 	stype, 
	void *		recvbuf,
	int *		rcounts,
	int *		displs,
	MPI_Datatype 	rtype, 
	MPI_Comm 	comm)
{
    return (MPI_Allgatherv ( sendbuf, scount, stype, recvbuf, rcounts, displs, rtype, comm ) );
}
Beispiel #26
0
int lis_vector_gather(LIS_VECTOR v, LIS_SCALAR value[])
{
#ifdef USE_MPI
	int err,i,j,is,n,my_rank,nprocs,*recvcounts;

	err = lis_vector_check(v,LIS_VECTOR_CHECK_NULL);
	if( err ) return err;

	my_rank = v->my_rank;
	nprocs  = v->nprocs;
	n       = v->n;
	is      = v->is;

	#ifdef USE_VEC_COMP
	#pragma cdir nodep
	#endif
	#ifdef _OPENMP
	#pragma omp parallel for private(i)
	#endif
	for(i=0; i<n; i++)
	{
	  value[i+is] = v->value[i];
	}
	recvcounts = (int *)lis_malloc( (nprocs+1)*sizeof(int),"lis_vector_gather::recvcounts" );
	for(i=0; i<nprocs; i++)
	{
	  recvcounts[i] = v->ranges[i+1] - v->ranges[i];
	}
	MPI_Allgatherv(&value[is],n,MPI_DOUBLE,&value[0],recvcounts,v->ranges,MPI_DOUBLE,v->comm);

	return LIS_SUCCESS;
#else
	int err,i,n;

	err = lis_vector_check(v,LIS_VECTOR_CHECK_NULL);
	if( err ) return err;

	n = v->n;

	#ifdef USE_VEC_COMP
	#pragma cdir nodep
	#endif
	#ifdef _OPENMP
	#pragma omp parallel for private(i)
	#endif
	for(i=0; i<n; i++)
	{
	  value[i] = v->value[i];
	}

	return LIS_SUCCESS;
#endif
}
Beispiel #27
0
int main( int argc, char **argv )
{
    double *vecout;
    MPI_Comm comm;
    int    count, minsize = 2;
    int    i, errs = 0;
    int    rank, size;
    int    *displs, *recvcounts;

    MTest_Init( &argc, &argv );

    while (MTestGetIntracommGeneral( &comm, minsize, 1 )) {
	if (comm == MPI_COMM_NULL) continue;
	/* Determine the sender and receiver */
	MPI_Comm_rank( comm, &rank );
	MPI_Comm_size( comm, &size );

	displs     = (int *)malloc( size * sizeof(int) );
	recvcounts = (int *)malloc( size * sizeof(int) );
	
        for (count = 1; count < 9000; count = count * 2) {
            vecout = (double *)malloc( size * count * sizeof(double) );
            
            for (i=0; i<count; i++) {
                vecout[rank*count+i] = rank*count+i;
            }
            for (i=0; i<size; i++) {
                recvcounts[i] = count;
                displs[i]    = i * count;
            }
            MPI_Allgatherv( MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, 
                            vecout, recvcounts, displs, MPI_DOUBLE, comm );
            for (i=0; i<count*size; i++) {
                if (vecout[i] != i) {
                    errs++;
                    if (errs < 10) {
                        fprintf( stderr, "vecout[%d]=%d\n",
                                 i, (int)vecout[i] );
                    }
                }
            }
            free( vecout );
        }

	free( displs );
	free( recvcounts );
	MTestFreeComm( &comm );
    }

    MTest_Finalize( errs );
    MPI_Finalize();
    return 0;
}
    /// combine (per-process) messages
    Opm::DeferredLogger gatherDeferredLogger(const Opm::DeferredLogger& local_deferredlogger)
    {

        int num_messages = local_deferredlogger.messages_.size();

        int int64_mpi_pack_size;
        MPI_Pack_size(1, MPI_INT64_T, MPI_COMM_WORLD, &int64_mpi_pack_size);
        int unsigned_int_mpi_pack_size;
        MPI_Pack_size(1, MPI_UNSIGNED, MPI_COMM_WORLD, &unsigned_int_mpi_pack_size);

        // store number of messages;
        int message_size = unsigned_int_mpi_pack_size;
        // store 1 int64 per message for flag
        message_size += num_messages*int64_mpi_pack_size;
        // store 2 unsigned ints per message for length of tag and length of text
        message_size += num_messages*2*unsigned_int_mpi_pack_size;

        for (const auto lm : local_deferredlogger.messages_) {
            int string_mpi_pack_size;
            MPI_Pack_size(lm.tag.size(), MPI_CHAR, MPI_COMM_WORLD, &string_mpi_pack_size);
            message_size += string_mpi_pack_size;
            MPI_Pack_size(lm.text.size(), MPI_CHAR, MPI_COMM_WORLD, &string_mpi_pack_size);
            message_size += string_mpi_pack_size;
        }

        // Pack local messages.
        std::vector<char> buffer(message_size);

        int offset = 0;
        packMessages(local_deferredlogger.messages_, buffer, offset);
        assert(offset == message_size);

        // Get message sizes and create offset/displacement array for gathering.
        int num_processes = -1;
        MPI_Comm_size(MPI_COMM_WORLD, &num_processes);
        std::vector<int> message_sizes(num_processes);
        MPI_Allgather(&message_size, 1, MPI_INT, message_sizes.data(), 1, MPI_INT, MPI_COMM_WORLD);
        std::vector<int> displ(num_processes + 1, 0);
        std::partial_sum(message_sizes.begin(), message_sizes.end(), displ.begin() + 1);

        // Gather.
        std::vector<char> recv_buffer(displ.back());
        MPI_Allgatherv(buffer.data(), buffer.size(), MPI_PACKED,
                       const_cast<char*>(recv_buffer.data()), message_sizes.data(),
                       displ.data(), MPI_PACKED,
                       MPI_COMM_WORLD);

        // Unpack.
        Opm::DeferredLogger global_deferredlogger;
        global_deferredlogger.messages_ = unpackMessages(recv_buffer, displ);
        return global_deferredlogger;
    }
Beispiel #29
0
value caml_mpi_allgather(value sendbuf,
                         value recvbuf, value recvlengths,
                         value comm)
{
  int * recvcounts, * displs;

  caml_mpi_counts_displs(recvlengths, &recvcounts, &displs);
  MPI_Allgatherv(String_val(sendbuf), string_length(sendbuf), MPI_BYTE,
                 String_val(recvbuf), recvcounts, displs, MPI_BYTE,
                 Comm_val(comm));
  stat_free(recvcounts);
  stat_free(displs);
  return Val_unit;
}
  RCP<typename BrickAggregationFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::container>
  BrickAggregationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
  Construct1DMap (const RCP<const Teuchos::Comm<int> >& comm,
                  const ArrayRCP<const double>& x) const
  {
    int n = x.size();

    // Step 1: Create a local vector with unique coordinate points
    RCP<container> gMap = rcp(new container);
    for (int i = 0; i < n; i++)
      (*gMap)[x[i]] = 0;

#ifdef HAVE_MPI
    // Step 2: exchange coordinates
    // NOTE: we assume the coordinates are double, or double compatible
    // That means that for complex case, we assume that all imaginary parts are zeros
    int numProcs = comm->getSize();
    if (numProcs > 1) {
      RCP<const Teuchos::MpiComm<int> > dupMpiComm = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm->duplicate());

      MPI_Comm rawComm = (*dupMpiComm->getRawMpiComm())();

      int           sendCnt = gMap->size(), cnt = 0, recvSize;
      Array<int>    recvCnt(numProcs), Displs(numProcs);
      Array<double> sendBuf, recvBuf;

      sendBuf.resize(sendCnt);
      for (typename container::const_iterator cit = gMap->begin(); cit != gMap->end(); cit++)
        sendBuf[cnt++] = Teuchos::as<double>(STS::real(cit->first));

      MPI_Allgather(&sendCnt, 1, MPI_INT, recvCnt.getRawPtr(), 1, MPI_INT, rawComm);
      Displs[0] = 0;
      for (int i = 0; i < numProcs-1; i++)
        Displs[i+1] = Displs[i] + recvCnt[i];
      recvSize = Displs[numProcs-1] + recvCnt[numProcs-1];
      recvBuf.resize(recvSize);
      MPI_Allgatherv(sendBuf.getRawPtr(), sendCnt, MPI_DOUBLE, recvBuf.getRawPtr(), recvCnt.getRawPtr(), Displs.getRawPtr(), MPI_DOUBLE, rawComm);

      for (int i = 0; i < recvSize; i++)
        (*gMap)[as<SC>(recvBuf[i])] = 0;
    }
#endif

    GO cnt = 0;
    for (typename container::iterator it = gMap->begin(); it != gMap->end(); it++)
      it->second = cnt++;

    return gMap;
  }