void parallelMatrixTimesVector(int local_rows, int cols,
			       double *local_A, double *b, double *y, 
			       int root, int my_rank, int p, MPI_Comm comm)
{
  /*
    This function performs parallel matrix-vector multiplication of a
    matrix A times vector b.  The matrix is distributed by rows. Each
    process contains (local_rows)x(cols) matrix local_A stored as a
    one-dimensional array.  The vector b is stored on each process.
    Each process computes its result and then process root
    collects the resutls and returns it in y.

    local_rows is the number of rows on my_rank
    cols       is the number of columns on each process
    local_A    is a pointer to the matrix on my_rank
    b          is a pointer to the vector b of size cols
    y          is a pointer to the result on the root process. 
               y is significant only on root.
  */

  double *local_y = malloc(sizeof(double)*local_rows);
  /* Compute the local matrix times vector */
  compMatrixTimesVector(local_rows, cols, local_A, b, local_y);
  int sendcount = local_rows; /* number of doubles sent by process my_rank  */
  int *reccounts; /* reccounts[i] is the number of doubles received from process i */
  int *displs;    /* displs for the MPI_Gatherv function */

  if (my_rank != root)
    {
      /* Send the sendcounts to the root process. reccounts does not matter here. */
      MPI_Gather(&sendcount, 1, MPI_INT, reccounts, 1, MPI_INT, root, comm);
      /* Send the computed results to the root process. The receive
	 buffer, reccounts, and displs do not matter here. */
      MPI_Gatherv(local_y, sendcount, MPI_DOUBLE, 
		  y, reccounts, displs, MPI_DOUBLE, root, comm);
    }
  else /* we are on root process */
    {
      /* Gatter the receive counts from each process */
      reccounts = malloc(sizeof(int)*p);
      MPI_Gather(&sendcount, 1, MPI_INT, reccounts, 1, MPI_INT, 0, comm);

      /* Calculate displs for MPI_Gatterv */
      displs = malloc(sizeof(int)*p);
      int i;
      displs[0] = 0;
      for (i = 1; i < p; i++) 
	displs[i] = displs[i-1] + reccounts[i-1];
      
      /* Gather the results on process 0 */
      MPI_Gatherv(local_y, sendcount, MPI_DOUBLE, 
		  y, reccounts, displs, MPI_DOUBLE, root, comm);
      
      free(displs);
      free(reccounts);
    }
  free(local_y);
}  
Beispiel #2
0
int main(int argc, char **argv)
{
	if (MPI_Init(&argc, &argv) != MPI_SUCCESS) {
		fprintf(stderr, "MPI initialization failed.\n");
		return 1;
	}
	int rank, size;
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &size);
	if (size < 2) {
		fprintf(stderr, "cant play this game alone.\n");
		return 1;
	}
	srand(rank + MPI_Wtime());
	int sendcount = rand()%10 + 1;
	char sendbuf[sendcount];
	for (int i = 0; i < sendcount; i++)
		sendbuf[i] = '0' + rank%10;
	fprintf(stderr, "[ %d ] sendcount: %d\n", rank, sendcount);
	int recvcounts[size];
	if (MPI_Allgather(&sendcount, 1, MPI_INT, &recvcounts, 1, MPI_INT, MPI_COMM_WORLD)) {
		fprintf(stderr, "MPI_Allgather failed\n");
		MPI_Abort(MPI_COMM_WORLD, 1);
	}
	int totalcount = 0;
	for (int i = 0; i < size; i++)
		totalcount += recvcounts[i];
	fprintf(stderr, "[ %d ] totalcount: %d\n", rank, totalcount);
	char recvbuf[totalcount+1];
	memset(recvbuf, 0, sizeof(recvbuf));
	if (rank) {
		if (MPI_Gatherv(sendbuf, sendcount, MPI_CHAR, 0, 0, 0, 0, 0, MPI_COMM_WORLD)) {
			fprintf(stderr, "MPI_Gatherv failed\n");
			MPI_Abort(MPI_COMM_WORLD, 1);
		}
	} else {
		int displs[size];
		displs[0] = 0;
		for (int i = 1; i < size; i++)
			displs[i] = displs[i - 1] + recvcounts[i - 1];
		if (MPI_Gatherv(sendbuf, sendcount, MPI_CHAR, recvbuf, recvcounts, displs, MPI_CHAR, 0, MPI_COMM_WORLD)) {
			fprintf(stderr, "MPI_Gatherv failed\n");
			MPI_Abort(MPI_COMM_WORLD, 1);
		}
		fprintf(stderr, "[ %d ] received Gatherv \"%s\"\n", rank, recvbuf);
	}
	if (MPI_Bcast(recvbuf, totalcount, MPI_CHAR, 0, MPI_COMM_WORLD)) {
		fprintf(stderr, "MPI_Bcast failed\n");
		MPI_Abort(MPI_COMM_WORLD, 1);
	}
	fprintf(stderr, "[ %d ] received Bcast \"%s\"\n", rank, recvbuf);
	MPI_Finalize();
	return 0;
}
Beispiel #3
0
void collect_x_old(double **x,double *xold)
{
  int i,iunk,loc_inode,nunk_per_proc;
  int *index=NULL;
  double *unk_global, *unk_loc;

  /* allocate temporary arrays */

  nunk_per_proc = Nnodes_per_proc*Nunk_per_node;
  unk_loc = (double *) array_alloc (1, nunk_per_proc, sizeof(double));

  for (loc_inode=0; loc_inode < Nnodes_per_proc; loc_inode++ )
     for (iunk=0; iunk<Nunk_per_node; iunk++){
     unk_loc[iunk+Nunk_per_node*loc_inode] = x[iunk][L2B_node[loc_inode]];  /* always use nodal ordering here */
  }

  if (Proc == 0) {
    unk_global = (double *) array_alloc (1, Nunknowns, sizeof(double));
    index = (int *) array_alloc (1, Nnodes, sizeof(int));
  }
  else {
    unk_global=NULL;
    index=NULL;
  }

  /* collect the node numbers from all the processors */

  MPI_Gatherv(L2G_node,Nnodes_per_proc,MPI_INT,
              index,Comm_node_proc,Comm_offset_node,
              MPI_INT,0,MPI_COMM_WORLD);

  /* collect the unknowns from all the processors */

  MPI_Gatherv(unk_loc,nunk_per_proc,MPI_DOUBLE,
              unk_global,Comm_unk_proc,Comm_offset_unk,
              MPI_DOUBLE,0,MPI_COMM_WORLD);
  safe_free((void *) &unk_loc);

  if (Proc == 0){
     for (i=0; i<Nnodes; i++){
        for (iunk=0; iunk<Nunk_per_node; iunk++){
           xold[index[i]*Nunk_per_node+iunk] = unk_global[i*Nunk_per_node+iunk];
        }
     }
     safe_free((void *) &unk_global);
     safe_free((void *) &index);
  }
  safe_free((void *) &unk_loc);

  return;
}
Beispiel #4
0
//-----------------------------------------------------------------------------
//
//-----------------------------------------------------------------------------
void Image_Exchanger::exchange_fragment_images(unsigned int* databuf,
                                               int nviewer,
                                               ImageFragment_Tile* ift)
{
//    fprintf(stderr, "**** %s:%s() ****\n", __FILE__, __func__);
 
#ifdef _DEBUG7
    fprintf(stderr, "**** %s:%s() ****\n", __FILE__, __func__);
#endif


    unsigned int* sendbuf = databuf + m_sbuf_offset;
    unsigned int* recvbuf = databuf + m_rbuf_offset;


    if(nviewer == 1)
    {
        MPI_Gatherv((int*)sendbuf, m_scounts[0], MPI_INT,
                    (int*)recvbuf, m_rcounts, m_rdispls, MPI_INT,
                    0, MPI_COMM_WORLD);
    }
    else
    {
        MPI_Alltoallv( (int*)sendbuf, m_scounts, m_sdispls, MPI_INT,
                       (int*)recvbuf, m_rcounts, m_rdispls, MPI_INT, 
                       MPI_COMM_WORLD);
    }

    ift->address_fragments(m_rbuf_offset, m_rdispls);
}
Beispiel #5
0
void mpi_gatherv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype,
		   char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs,
		   MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm,
		   MPI_Fint *ierr)
{
    MPI_Comm c_comm;
    MPI_Datatype c_sendtype, c_recvtype;
    int size;
    OMPI_ARRAY_NAME_DECL(recvcounts);
    OMPI_ARRAY_NAME_DECL(displs);

    c_comm = MPI_Comm_f2c(*comm);
    c_sendtype = MPI_Type_f2c(*sendtype);
    c_recvtype = MPI_Type_f2c(*recvtype);
    
    MPI_Comm_size(c_comm, &size);
    OMPI_ARRAY_FINT_2_INT(recvcounts, size);
    OMPI_ARRAY_FINT_2_INT(displs, size);

    sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf);
    sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf);
    recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf);

    *ierr = OMPI_INT_2_FINT(MPI_Gatherv(sendbuf, OMPI_FINT_2_INT(*sendcount),
					c_sendtype, recvbuf,
					OMPI_ARRAY_NAME_CONVERT(recvcounts),
					OMPI_ARRAY_NAME_CONVERT(displs),
					c_recvtype, 
					OMPI_FINT_2_INT(*root),
					c_comm));
}
Beispiel #6
0
   void invoke() {

    if (!has_contiguous_data(lhs)) TRIQS_RUNTIME_ERROR << "mpi gather of array into a non contiguous view";

    auto c = laz.c;
    auto recvcounts = std::vector<int>(c.size());
    auto displs = std::vector<int>(c.size() + 1, 0);
    int sendcount = laz.ref.domain().number_of_elements();
    auto D = mpi::mpi_datatype<typename A::value_type>();

    auto d = laz.domain();
    if (laz.all || (laz.c.rank() == laz.root)) resize_or_check_if_view(lhs, d.lengths());

    void *lhs_p = lhs.data_start();
    const void *rhs_p = laz.ref.data_start();

    auto mpi_ty = mpi::mpi_datatype<int>();
    if (!laz.all)
     MPI_Gather(&sendcount, 1, mpi_ty, &recvcounts[0], 1, mpi_ty, laz.root, c.get());
    else
     MPI_Allgather(&sendcount, 1, mpi_ty, &recvcounts[0], 1, mpi_ty, c.get());

    for (int r = 0; r < c.size(); ++r) displs[r + 1] = recvcounts[r] + displs[r];

    if (!laz.all)
     MPI_Gatherv((void *)rhs_p, sendcount, D, lhs_p, &recvcounts[0], &displs[0], D, laz.root, c.get());
    else
     MPI_Allgatherv((void *)rhs_p, sendcount, D, lhs_p, &recvcounts[0], &displs[0], D, c.get());
   }
Beispiel #7
0
void gatherVector(float *localProdVec, int *rowInfo, float *prodVec) {
  int myRank, numProcs;
  int i;

  int *displs, *recvCount;
  int myRowCount;

  MPI_Comm_size(MPI_COMM_WORLD, &numProcs);
  MPI_Comm_rank(MPI_COMM_WORLD, &myRank);

  displs = (int *) malloc(sizeof(int) * numProcs);
  recvCount = (int *) malloc(sizeof(int) * numProcs);

  if (myRank == ROOT) {
    for (i = 0; i < numProcs; i++) {
      //get offset and count of rows to be received from proc i
      displs[i] = rowInfo[i];
      recvCount[i] = rowInfo[i+numProcs] - rowInfo[i] + 1;
    }
  }

  myRowCount = rowInfo[myRank+numProcs] - rowInfo[myRank] + 1;

  //gather this computed vector at root
  MPI_Gatherv(localProdVec, myRowCount, MPI_FLOAT,
	      prodVec, recvCount, displs,
	      MPI_FLOAT, ROOT, MPI_COMM_WORLD);

  free(displs);
  free(recvCount);
}
// computes A*mat and stores result on the rank 0 process in matProd (assumes the memory has already been allocated)
void distributedMatMatProd(const double *localRowChunk, const double *mat,
    double *matProd, const distMatrixInfo *matInfo, const distGatherInfo
    *eigInfo, scratchMatrices * scratchSpace) {
    multiplyAChunk(localRowChunk, mat, scratchSpace->Scratch3,
        matInfo->localrows, matInfo->numcols, eigInfo->numeigs);
    if (matInfo->mpi_rank != 0) {
        MPI_Gatherv(scratchSpace->Scratch3,
            matInfo->localrows*eigInfo->numeigs, MPI_DOUBLE, NULL, NULL, NULL,
            MPI_DOUBLE, 0, *(matInfo->comm));
    } else {
        MPI_Gatherv(scratchSpace->Scratch3,
            matInfo->localrows*eigInfo->numeigs, MPI_DOUBLE, matProd,
            eigInfo->elementcounts, eigInfo->elementoffsets, MPI_DOUBLE, 0,
            *(matInfo->comm));
    }
}
Beispiel #9
0
double time_gatherv(struct collParams* p)
{
    int i, size2;
    int disp = 0;
    for ( i = 0; i < p->nranks; i++) {
        int size2 = i % (p->size+1);
        recvcounts[i] = size2;
        rdispls[i] = disp;
        disp += size2;
    }
    MPI_Barrier(MPI_COMM_WORLD);

    size2 = p->myrank % (p->size+1);
    __TIME_START__;
    for (i = 0; i < p->iter; i++) {
        MPI_Gatherv(sbuffer, size2, p->type, rbuffer, recvcounts, rdispls, p->type, p->root, p->comm);
        __BAR__(p->comm);
    }
    __TIME_END__;

    if (check_buffers) {
        check_sbuffer(p->myrank);
        if (p->myrank == p->root) {
            for (i = 0; i < p->nranks; i++) {
                check_rbuffer(rbuffer, rdispls[i], i, 0, recvcounts[i]);
            }
        }
    }

    return __TIME_USECS__ / (double)p->iter;
}
Beispiel #10
0
void Coll_ids_at_Master( reax_system *system, storage *workspace, 
			 mpi_datatypes *mpi_data )
{
  int i;
  int *id_list;

  MPI_Gather( &system->n, 1, MPI_INT, workspace->rcounts, 1, MPI_INT, 
	      MASTER_NODE, mpi_data->world ); 

  if( system->my_rank == MASTER_NODE ){
    workspace->displs[0] = 0;
    for( i = 1; i < system->wsize; ++i )
      workspace->displs[i] = workspace->displs[i-1] + workspace->rcounts[i-1];
  }

  id_list = (int*) malloc( system->n * sizeof(int) );
  for( i = 0; i < system->n; ++i )
    id_list[i] = system->my_atoms[i].orig_id;
  
  MPI_Gatherv( id_list, system->n, MPI_INT, 
	       workspace->id_all, workspace->rcounts, workspace->displs, 
	       MPI_INT, MASTER_NODE, mpi_data->world );

  free( id_list );

#if defined(DEBUG)
  if( system->my_rank == MASTER_NODE ) {
    for( i = 0 ; i < system->bigN; ++i )
      fprintf( stderr, "id_all[%d]: %d\n", i, workspace->id_all[i] );
  }
#endif
}
Beispiel #11
0
ubjson::Value Master::ExportSimulation() {
	// This method is a control method, so sends orders from master 0 to other
	// masters
	if (id_ == 0) {
		order_ = Order::EXPORT_SIMULATION;
		MPI_Bcast(&order_, 1, MPI_INT, 0, MasterComm_);
	}

	ubjson::Value local_agents;
	std::vector<ubjson::Value> local_agents_by_types(nb_types_);
	for (AgentHandler &agent_handler : agent_handlers_) {
		agent_handler.GetJsonNodes(local_agents_by_types);
	}
	for (auto &type : agent_type_to_string_) {
		local_agents[type.second] = std::move(local_agents_by_types.at(type.first));
	}

	// Now all the infos must be gathered in master 0
	std::ostringstream local_data_stream;
	ubjson::StreamWriter<std::ostringstream> writer(local_data_stream);
	writer.writeValue(local_agents);
	std::string local_data = local_data_stream.str();
	int local_data_size = local_data.size();
	// First master 0 must know how much data it will receive
	std::vector<int> sizes_to_receive;
	if (id_ == 0) {
		sizes_to_receive.resize(nb_masters_);
	}
	MPI_Gather(&local_data_size, 1, MPI_INT, sizes_to_receive.data(), 1, MPI_INT, 0, MasterComm_);
	// Storing the results in 'results'
	std::vector<std::string> results;
	if (id_ == 0) {
		for (int i=0; i<nb_masters_; i++) {
			results.emplace_back(std::string(sizes_to_receive.at(i), '0'));
		}
	}
	std::vector<int> displs;
	if (id_ == 0) {
		for (int i=0; i<nb_masters_; i++) {
			displs.push_back(results.at(i).data()-(char*)results.data());
		}
	}
	MPI_Gatherv((void*)local_data.data(), local_data_size, MPI_UNSIGNED_CHAR,
		(void*)results.data(), sizes_to_receive.data(), displs.data(), MPI_UNSIGNED_CHAR, 0, MasterComm_);

	// Grouping the results
	ubjson::Value agents;
	for (auto &master_agents : results) {
		ubjson::Value masters_value;
		std::istringstream s(master_agents);
		ubjson::StreamReader<std::istringstream> reader(s);
		masters_value = reader.getNextValue();
		for (auto &type : agent_type_to_string_) {
			for (auto &agent : masters_value[type.second]) {
				agents[type.second].push_back(agent);
			}
		}
	}
	ubjson::Value final;
	final["agents"] = agents;
Beispiel #12
0
void Coll_rvecs_at_Master( reax_system *system, storage *workspace, 
			   mpi_datatypes *mpi_data, rvec* v )
{
  MPI_Gatherv( v, system->n, mpi_data->mpi_rvec, 
	       workspace->f_all, workspace->rcounts, workspace->displs, 
	       mpi_data->mpi_rvec, MASTER_NODE, mpi_data->world );
}
Beispiel #13
0
    static void gather(const communicator& comm, const std::vector<T>& in, std::vector< std::vector<T> >& out, int root)
    {
      std::vector<int>  counts(comm.size());
      Collectives<int,void*>::gather(comm, (int) in.size(), counts, root);

      std::vector<int>  offsets(comm.size(), 0);
      for (unsigned i = 1; i < offsets.size(); ++i)
        offsets[i] = offsets[i-1] + counts[i-1];

      std::vector<T> buffer(offsets.back() + counts.back());
      MPI_Gatherv(Datatype::address(const_cast<T&>(in[0])),
                  in.size(),
                  Datatype::datatype(),
                  Datatype::address(buffer[0]),
                  &counts[0],
                  &offsets[0],
                  Datatype::datatype(),
                  root, comm);

      out.resize(comm.size());
      size_t cur = 0;
      for (unsigned i = 0; i < (unsigned)comm.size(); ++i)
      {
          out[i].reserve(counts[i]);
          for (unsigned j = 0; j < (unsigned)counts[i]; ++j)
              out[i].push_back(buffer[cur++]);
      }
    }
Beispiel #14
0
void mpi_gatherv (void *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype,
		  void *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs,
		  MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *__ierr)
{
  *__ierr = MPI_Gatherv (sendbuf, *sendcount, MPI_Type_f2c (*sendtype),
			 recvbuf, recvcounts, displs, 
			 MPI_Type_f2c (*recvtype), *root,MPI_Comm_f2c (*comm));
}
Beispiel #15
0
std::vector<int> gather_vectors(std::vector<int>& local_vec, MPI_Comm comm)
{
    // get MPI parameters
    int rank;
    int p;
    MPI_Comm_rank(comm, &rank);
    MPI_Comm_size(comm, &p);

    // get local size
    int local_size = local_vec.size();

    // init result
    std::vector<int> result;

    // master process: receive results
    if (rank == 0)
    {
        // gather local array sizes, sizes are restricted to `int` by MPI anyway
        // therefore use int
        std::vector<int> local_sizes(p);
        MPI_Gather(&local_size, 1, MPI_INT, &local_sizes[0], 1, MPI_INT, 0, comm);

        // gather-v to collect all the elements
        int total_size = std::accumulate(local_sizes.begin(), local_sizes.end(), 0);
        result.resize(total_size);

        // get receive displacements
        std::vector<int> displs(p, 0);
        for (int i = 1; i < p; ++i)
            displs[i] = displs[i-1] + local_sizes[i-1];

        // gather v the vector data to the root
        MPI_Gatherv(&local_vec[0], local_size, MPI_INT,
                    &result[0], &local_sizes[0], &displs[0], MPI_INT, 0, comm);
    }
    // else: send results
    else {
        // gather local array sizes
        MPI_Gather(&local_size, 1, MPI_INT, NULL, 1, MPI_INT, 0, comm);

        // sent the actual data
        MPI_Gatherv(&local_vec[0], local_size, MPI_INT,
                    NULL, NULL, NULL, MPI_INT, 0, comm);
    }
    return result;
}
Beispiel #16
0
FC_FUNC( mpi_gatherv , MPI_GATHERV )
                        ( void *sendbuf, int *sendcount, int *sendtype,
			  void *recvbuf, int *recvcounts, int *displs,
			  int *recvtype, int *root, int *comm, int *ierror)
{
  *ierror=MPI_Gatherv( mpi_c_in_place(sendbuf), *sendcount, *sendtype,
		       recvbuf, recvcounts, displs,
		       *recvtype, *root, *comm);
}
// ****************************************************************************
//  Method: avtImgCommunicator::
//
//  Purpose: 
//    Send the metadata needed by the root node to make decisions
//
//  Arguments:
//    arraySize   : the number of elements being sent
//    allIotaMetadata : the metadata bieng sent
//
//  Programmer: Pascal Grosset
//  Creation: July 2013
//
//  Modifications:
//
// ****************************************************************************
void avtImgCommunicator::gatherIotaMetaData(int arraySize, float *allIotaMetadata){

  #ifdef PARALLEL
    int *recvSizePerProc = NULL;
    float *tempRecvBuffer = NULL;
    int *offsetBuffer = NULL;

    if (my_id == 0){
      tempRecvBuffer = new float[totalPatches*7]; // x7: procId, patchNumber, dims[0], dims[1], screen_ll[0], screen_ll[1], avg_z
      recvSizePerProc = new int[num_procs]; 
      offsetBuffer = new int[num_procs];  
      for (int i=0; i<num_procs; i++){
        if (i == 0)
          offsetBuffer[i] = 0;
        else
          offsetBuffer[i] = offsetBuffer[i-1] + recvSizePerProc[i-1];

        recvSizePerProc[i] = processorPatchesCount[i]*7;
      }
    }

    MPI_Gatherv(allIotaMetadata, arraySize, MPI_FLOAT,   tempRecvBuffer, recvSizePerProc, offsetBuffer,MPI_FLOAT,    0, MPI_COMM_WORLD);// all send to proc 0

    if (my_id == 0){
      allRecvIotaMeta = new iotaMeta[totalPatches]; // allocate space to receive the many patches

      iotaMeta tempPatch;
      for (int i=0; i<totalPatches; i++){
        tempPatch.procId =    (int) tempRecvBuffer[i*7 + 0];
        tempPatch.patchNumber = (int) tempRecvBuffer[i*7 + 1];
        tempPatch.dims[0] =   (int) tempRecvBuffer[i*7 + 2];
        tempPatch.dims[1] =   (int) tempRecvBuffer[i*7 + 3];
        tempPatch.screen_ll[0] =(int) tempRecvBuffer[i*7 + 4];
        tempPatch.screen_ll[1] =(int) tempRecvBuffer[i*7 + 5];
        tempPatch.avg_z =         tempRecvBuffer[i*7 + 6];

        int patchIndex = getDataPatchID(tempPatch.procId, tempPatch.patchNumber);
        allRecvIotaMeta[patchIndex] = setIota(tempPatch.procId, tempPatch.patchNumber, tempPatch.dims[0], tempPatch.dims[1], tempPatch.screen_ll[0], tempPatch.screen_ll[1], tempPatch.avg_z);
        all_avgZ_proc0.insert(tempPatch.avg_z); //insert avg_zs into the set to keep a count of the total number of avg_zs
      }

      if (recvSizePerProc != NULL)
        delete []recvSizePerProc;
      recvSizePerProc = NULL;

      if (offsetBuffer != NULL)
        delete []offsetBuffer;
      offsetBuffer = NULL;

      if (tempRecvBuffer != NULL)
        delete []tempRecvBuffer;
      tempRecvBuffer = NULL;

    }
  #endif  
}
Beispiel #18
0
/*
 * Class:     mpi_Intracomm
 * Method:    Gatherv
 * Signature:
 (Ljava/lang/Object;IILmpi/Datatype;Ljava/lang/Object;I[I[ILmpi/Datatype;I)V
*/
JNIEXPORT void JNICALL Java_mpi_Intracomm_gatherv(JNIEnv *env, jobject jthis,
                                                  jobject sendbuf, jint sendoffset,
                                                  jint sendcount, jobject sendtype,
                                                  jobject recvbuf, jint recvoffset,
                                                  jintArray recvcounts, jintArray displs,
                                                  jobject recvtype, jint root)
{
    int id ;
    jint *rcount = NULL, *dps = NULL;
    jboolean isCopy ;

    MPI_Comm mpi_comm =
        (MPI_Comm) ((*env)->GetLongField(env,jthis,ompi_java.CommhandleID)) ;

    MPI_Datatype mpi_stype = (MPI_Datatype)
        ((*env)->GetLongField(env,sendtype,ompi_java.DatatypehandleID)) ;
    MPI_Datatype mpi_rtype = mpi_stype;

    int sbaseType = (*env)->GetIntField(env, sendtype, ompi_java.DatatypebaseTypeID) ;
    int rbaseType = 0;

    void *sendptr, *recvptr = NULL;
    void *sbufbase, *rbufbase ;

    ompi_java_clearFreeList(env) ;

    MPI_Comm_rank(mpi_comm, &id) ;
    if(id == root) {
        rcount=(*env)->GetIntArrayElements(env,recvcounts,&isCopy);
        dps=(*env)->GetIntArrayElements(env,displs,&isCopy);

        mpi_rtype = (MPI_Datatype)
            ((*env)->GetLongField(env,recvtype,ompi_java.DatatypehandleID)) ;

        rbaseType = (*env)->GetIntField(env, recvtype, ompi_java.DatatypebaseTypeID) ;

        recvptr = ompi_java_getBufPtr(&rbufbase,
                                         env, recvbuf, rbaseType, recvoffset) ;
    }

    sendptr = ompi_java_getBufPtr(&sbufbase, env, sendbuf, sbaseType, sendoffset) ;

    MPI_Gatherv(sendptr, sendcount, mpi_stype,
                recvptr, (int*) rcount, (int*) dps, mpi_rtype,
                root, mpi_comm) ;

    ompi_java_releaseBufPtr(env, sendbuf, sbufbase, sbaseType) ;
    if (id == root) {
        ompi_java_releaseBufPtr(env, recvbuf, rbufbase, rbaseType);
    }

    if (id == root) {
        (*env)->ReleaseIntArrayElements(env,recvcounts,rcount,JNI_ABORT);
        (*env)->ReleaseIntArrayElements(env,displs,dps,JNI_ABORT);
    }
}
void dd_gatherv(gmx_domdec_t *dd,
                int scount,void *sbuf,
                int *rcounts,int *disps,void *rbuf)
{
#ifdef GMX_MPI
    MPI_Gatherv(sbuf,scount,MPI_BYTE,
                rbuf,rcounts,disps,MPI_BYTE,
                DDMASTERRANK(dd),dd->mpi_comm_all);
#endif
}
Beispiel #20
0
    static void gather(const communicator& comm, const std::vector<T>& in, int root)
    {
      Collectives<int,void*>::gather(comm, (int) in.size(), root);

      MPI_Gatherv(Datatype::address(const_cast<T&>(in[0])),
                  in.size(),
                  Datatype::datatype(),
                  0, 0, 0,
                  Datatype::datatype(),
                  root, comm);
    }
Beispiel #21
0
void reduce(int words_amount, char* words, int* indexes, int* list) {
	int nbytes;
	int i, j;
	int* count = NULL;
	int* skip = NULL;
	int* count_bytes = NULL;
	int* skip_bytes = NULL;

	if (world_rank == master) {
		count = calloc(world_size, sizeof(int));
		skip = calloc(world_size, sizeof(int));
		count_bytes = calloc(world_size, sizeof(int));
		skip_bytes = calloc(world_size, sizeof(int));

		divide(world_size, words_amount, words, count, skip);
		bytes(world_size, indexes, count, skip, count_bytes, skip_bytes);

		for (i=words_amount-1, j=world_size-1; i>=0; i--) {
			if (i<skip[j])
				j--;
			indexes[i] -= indexes[skip[j]-1];
		}
		for (i=0; i<world_size; i++)
			debug("R count[%d] = %d, count_bytes[%d] = %d, skip[%d] = %d, skip_bytes[%d] = %d\n", i, count[i], i, count_bytes[i], i, skip[i], i, skip_bytes[i]);
		for (i=0; i<words_amount; i++)
			debug("R indexes[%d] = %d\n", i, indexes[i]);
	}

	/* rozsylanie ilosci slow przydzielonych procesom */
	/* zapisane w tablicy count, w procesie zapisywane do zmiennej words_amount */
	MPI_Scatter(count, 1, MPI_INT, &words_amount, 1, MPI_INT, master, MPI_COMM_WORLD);
	debug("[%d] Got %d words to reduce\n", world_rank, words_amount);
	if (world_rank != master) /* jesli proces nie jest master, to trzeba mu przygotowac indexes */
		indexes = calloc(words_amount, sizeof(int));

	MPI_Scatter(count_bytes, 1, MPI_INT, &nbytes, 1, MPI_INT, master, MPI_COMM_WORLD);
	if (world_rank != master)
		list = calloc(nbytes, sizeof(int));
	debug("[%d] Got %d bytes of list to reduce\n", world_rank, nbytes);

	MPI_Scatterv(list, count_bytes, skip_bytes, MPI_INT, list, nbytes, MPI_INT, master, MPI_COMM_WORLD);
	MPI_Scatterv(indexes, count, skip, MPI_INT, indexes, words_amount, MPI_INT, master, MPI_COMM_WORLD);
	for (i=0; i<words_amount; i++)
		debug("[%d] Index %d\n", world_rank, indexes[i]);

	for (i=0, j=0; i<words_amount; i++) {
		int sum = 0;
		for (; j<indexes[i]; j++)
			sum += list[j];
		indexes[i] = sum;
		debug("[%d] SUM %d = %d\n", world_rank, i, indexes[i]);
	}
	MPI_Gatherv(indexes, words_amount, MPI_INT, indexes, count, skip, MPI_INT, master, MPI_COMM_WORLD);
}
Beispiel #22
0
// Gather the results of the computation at the root
void gather_pres() {
    MPI_Gatherv(local_pres0 + local_width + 3,      // sendbuf
                1,                                  // sendcount
                local_pres_t,                       // sendtype
                pres + imageSize + 3,               // recvebuf
                counts,                             // recvcounts
                displs,                             // displs
                pres_and_diverg_t,                  // recvtype
                ROOT,                               // root
                cart_comm);                         // comm
}
Beispiel #23
0
void parallelMatrixTimesVector(int num_local_rows, int cols,
			       double *local_A, double *b, double *y, 
			       int root, int my_rank, int p, MPI_Comm comm)
{
  /*
    This function performs parallel matrix-vector multiplication of a
    matrix A times vector b.  The matrix is distributed by rows. Each
    process contains (num_local_rows)x(cols) matrix local_A stored as a
    one-dimensional array.  The vector b is stored on each process.
    Each process computes its result and then process root
    collects the resutls and returns it in y.

    num_local_rows is the number of rows on my_rank
    cols       is the number of columns on each process
    local_A    is a pointer to the matrix on my_rank
    b          is a pointer to the vector b of size cols
    y          is a pointer to the result on the root process. 
               y is significant only on root.
  */

  /* Allocate memory for the local result on my_rank */
  double *local_y = malloc(sizeof(double)*num_local_rows);
  
  /* Compute the local matrix times vector */
  compMatrixTimesVector(num_local_rows, cols, local_A, b, local_y);
  
  int *reccounts; /* reccounts[i] is the number of doubles to be received from process i */
  int *displs;    /* displs for the MPI_Gatherv function */
  
  if (my_rank==root)
    reccounts = malloc(sizeof(int)*p);  
  /* Gatter num_local_rows from each process */
  MPI_Gather(&num_local_rows, 1, MPI_INT, reccounts, 1, MPI_INT, root, comm);
  
  if (my_rank==root)
    {
      /* Calculate displs for MPI_Gatterv */      
      int i;
      displs = malloc(sizeof(int)*p);
      displs[0] = 0;
      for (i = 1; i < p; i++) 
	displs[i] = displs[i-1] + reccounts[i-1];
    }
  
  MPI_Gatherv(local_y, num_local_rows, MPI_DOUBLE, 
	      y, reccounts, displs, MPI_DOUBLE, root, comm);
  if (my_rank==root)
    {
      free(displs); 
      free(reccounts);
    }
  free(local_y);
}  
int PLA_MPI_Gatherv(
	void *		sendbuf, 
	int 		scount, 
	MPI_Datatype 	stype, 
	void *		recvbuf,
	int *		rcounts,
	int *		displs,
	MPI_Datatype 	rtype, 
	int 		root, 
	MPI_Comm	comm)
{
    return (MPI_Gatherv ( sendbuf, scount, stype, recvbuf, rcounts, displs, rtype, root, comm ));
}
void LBNSCommunicator::gatherArray(
		int& count,
		std::vector<int>& data_size,
		std::vector<int>& data_displ,
		std::vector<int>& data){
	int rank=0;
	int com_size=0;
	MPI_Comm_rank(MPI_COMM_WORLD,&rank);

	MPI_Comm_size(MPI_COMM_WORLD,&com_size);
	if(data_size.size()==0){

		count=data.size();
		data_size.resize(1);
		if(rank==_maxSizeCommunicators[_index*2+1])
			data_size.resize(com_size);
		MPI_Gather(&count,1, MPI_INT, &data_size[0], 1, MPI_INT,_maxSizeCommunicators[_index*2+1], MPI_COMM_WORLD);
		if(rank==_maxSizeCommunicators[_index*2+1]){
			data_displ.resize(com_size);

			data_displ[rank]=0;

			for(int i=0;i<com_size;i++){
				if(i!=rank){
					data_displ[i]=count;
					count+=data_size[i];
				}
			}
		}
	}
	data.resize(count);
	if(rank==_maxSizeCommunicators[_index*2+1])
		MPI_Gatherv(MPI_IN_PLACE,0, MPI_INT,&data[0], &data_size[0],&data_displ[0], MPI_INT,_maxSizeCommunicators[_index*2+1], MPI_COMM_WORLD);
	else
		MPI_Gatherv(&data[0],count, MPI_INT,&data[0], &data_size[0],&data_displ[0], MPI_INT,_maxSizeCommunicators[_index*2+1], MPI_COMM_WORLD);



}
void dump_time_field(char* file_prefix, grid_parms grid, double field)
{
	MPI_Status status;
	MPI_Offset displacement = 0;
	MPI_File fw;
	char* buffer = (char*)checked_malloc(NUM_DBL_TO_CHAR_BYTES * sizeof(char), SRC_LOC);
	char* write_buffer;
	int root = 0;
	char filename[50];

	int length = sprintf(buffer, "%2.12lf\n", field);

	int *recv_count = (int*) checked_malloc(grid.num_ranks_branch * sizeof(int), SRC_LOC);
	int *disp = (int*) checked_malloc(grid.num_ranks_branch * sizeof(int), SRC_LOC);

	/// Gathering the lengths of buffer from each MPI process.
	CHECK_MPI_ERROR(MPI_Gather(&length, 1, MPI_INT, recv_count, 1, MPI_INT, root, grid.cart_comm));

	int total_buffer_length = 0;
	for (int i = 0; i < grid.num_ranks_branch; i++)
	{
		disp[i] = total_buffer_length;
		total_buffer_length += recv_count[i];
	}

	if (grid.rank_branch == 0)
	{
		write_buffer = (char*) checked_malloc(total_buffer_length * sizeof(char), SRC_LOC);
	}

	// Gathering the buffers from all MPI processes.
	CHECK_MPI_ERROR(MPI_Gatherv(buffer, length, MPI_CHAR, write_buffer, recv_count, disp, MPI_CHAR, root, grid.cart_comm));

	if (grid.rank_branch == 0)
	{
		sprintf(filename, "%s/%s_%d_%d.txt", grid.time_profiling_dir, file_prefix, grid.domain_index, grid.branch_tag);

		CHECK_MPI_ERROR(MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fw));
		CHECK_MPI_ERROR(MPI_File_write_at(fw, 0, write_buffer, total_buffer_length, MPI_CHAR, &status));
		MPI_File_close(&fw);
	}

	if (grid.rank_branch == 0)
	{
		free(write_buffer);
	}

	free(recv_count);
	free(buffer);
	free(disp);
}
Beispiel #27
0
PetscErrorCode writeProfileSurfaceScalarData(char *fileName, PetscScalar *arr, PetscInt numValsPerProfile, PetscTruth appendToFile)
{
  PetscErrorCode ierr;
  PetscScalar *tmpArr;
  PetscInt *displs, *rcounts, cumpro;
  PetscInt ipro;
  size_t m1, m2;
/*   off_t  off, offset;   */
  PetscViewer fd;
  PetscInt fp;
/*   PetscInt iShift; */
  PetscMPIInt numProcessors, myId;

  ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&myId);CHKERRQ(ierr);  
  ierr = MPI_Comm_size(PETSC_COMM_WORLD,&numProcessors);CHKERRQ(ierr);

  m1 = numValsPerProfile*totalNumProfiles*sizeof(PetscScalar);
  m2 = numProcessors*sizeof(PetscInt);  
/*   Allocate memory for temporary arrays */
  ierr = PetscMalloc(m1,&tmpArr);CHKERRQ(ierr);
  ierr = PetscMalloc(m2,&displs);CHKERRQ(ierr);
  ierr = PetscMalloc(m2,&rcounts);CHKERRQ(ierr);

  cumpro=0;
  for (ipro=1; ipro<=numProcessors; ipro++) {
    displs[ipro-1]=numValsPerProfile*cumpro;
    rcounts[ipro-1]=numValsPerProfile*gNumProfiles[ipro-1];
    cumpro = cumpro + gNumProfiles[ipro-1];
/*     ierr=PetscPrintf(PETSC_COMM_WORLD,"cumpro=%d, displs=%d\n",cumpro,displs[ipro-1],rcounts[ipro-1]);CHKERRQ(ierr);         */
  }
  
  MPI_Gatherv(arr,numValsPerProfile*lNumProfiles,MPI_DOUBLE,tmpArr,rcounts,displs,MPI_DOUBLE,0, PETSC_COMM_WORLD); 

  if (myId==0) { /* this shouldn't really be necessary, but without it, all processors seem to be writing in append mode */
	if (appendToFile) {
	  ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,fileName,FILE_MODE_APPEND,&fd);CHKERRQ(ierr);
	} else {
	  ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,fileName,FILE_MODE_WRITE,&fd);CHKERRQ(ierr);
	}  
  
	ierr = PetscViewerBinaryGetDescriptor(fd,&fp);CHKERRQ(ierr);
	ierr = PetscBinaryWrite(fp,tmpArr,numValsPerProfile*totalNumProfiles,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
	ierr = PetscViewerDestroy(fd);CHKERRQ(ierr);
  }
  
  ierr = PetscFree(tmpArr);CHKERRQ(ierr);
  ierr = PetscFree(displs);CHKERRQ(ierr);
  ierr = PetscFree(rcounts);CHKERRQ(ierr);
    
  return 0;
}
Beispiel #28
0
void generateSolutions()
{
	int i,j,k;
	sizeSol=0;
	solutionCounter = 0;
	MPI_Allgather(&my_frontSize,1,MPI_INT,
		frontSize_all,1,MPI_INT,
		MPI_COMM_WORLD);
	int count=0;
	for(i=0;i<NP;i++)
	{
		if(x_rank[i]==1)
		{
			memcpy(&collection_nonDom_x[count*D],
				&x_variable[i*D],D*sizeof(double));
			memcpy(&collection_nonDom_fit[count*Nobj],
				&x_fitness[i*Nobj],Nobj*sizeof(double));
			count++;
		}
	}

	update_recv_disp(frontSize_all,D);
	MPI_Gatherv(collection_nonDom_x,my_frontSize*D,MPI_DOUBLE,
		finalSolutions,recv_size,disp_size,MPI_DOUBLE,
		0,MPI_COMM_WORLD);
	update_recv_disp(frontSize_all,Nobj);
	MPI_Gatherv(collection_nonDom_fit,my_frontSize*Nobj,MPI_DOUBLE,
		finalFitness,recv_size,disp_size,MPI_DOUBLE,
		0,MPI_COMM_WORLD);

	for(i=0;i<numSpecies;i++)
	{
		sizeSol += frontSize_all[i];
	}

	if(mpi_rank==0)
		nonDominatedSorting(finalFitness,finalRank,sizeSol,sizeSol);
}
// computes means of the rows of A, subtracts them from A, and returns them in meanVec on the root process
// assumes memory has already been allocated for meanVec
void computeAndSubtractRowMeans(double *localRowChunk, double *meanVec, distMatrixInfo *matInfo) {
    int mpi_rank = matInfo->mpi_rank;
    int numcols = matInfo->numcols;
    int localrows = matInfo->localrows;
    int * rowcounts = matInfo->rowcounts;
    int * rowoffsets = matInfo->rowoffsets;
    MPI_Comm *comm = matInfo->comm;

    double *onesVec = (double *) malloc( numcols * sizeof(double));
    double *localMeanVec = (double *) malloc( localrows * sizeof(double));

    for(int idx = 0; idx < numcols; idx = idx + 1) {
        onesVec[idx]=1;
    }
    cblas_dgemv(CblasRowMajor, CblasNoTrans, localrows, numcols, 1.0/((double)numcols), localRowChunk, numcols, onesVec, 1, 0, localMeanVec, 1);
    cblas_dger(CblasRowMajor, localrows, numcols, -1.0, localMeanVec, 1, onesVec, 1, localRowChunk, numcols);
    if (mpi_rank != 0) {
        MPI_Gatherv(localMeanVec, localrows, MPI_DOUBLE, NULL, NULL, NULL, MPI_DOUBLE, 0, *comm);
    } else {
        MPI_Gatherv(localMeanVec, localrows, MPI_DOUBLE, meanVec, rowcounts, rowoffsets, MPI_DOUBLE, 0, *comm);
    }
    free(onesVec);
    free(localMeanVec);
}
int 	MPI_Gatherv_Wrapper(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int *recvcount, int *recvdisp, MPI_Datatype recvtype, int root, MPI_Comm comm)
{
#ifdef COMMPI
  char *me = ft_mpi_routine_names[MPI_Gatherv_cntr];
  int ierr;
  FT_INITIALIZE(me, ft_global_ht)
  ft_mpi_cntrs[MPI_Total_cntr]++;
  ft_mpi_cntrs[MPI_Gatherv_cntr]++;                                                                
  ierr = MPI_Gatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvdisp, recvtype, root, comm);
  FT_FINALIZE(me, ft_global_ht, 1)
  return(ierr);
#else
  return(0);
#endif
}