void parallelMatrixTimesVector(int local_rows, int cols, double *local_A, double *b, double *y, int root, int my_rank, int p, MPI_Comm comm) { /* This function performs parallel matrix-vector multiplication of a matrix A times vector b. The matrix is distributed by rows. Each process contains (local_rows)x(cols) matrix local_A stored as a one-dimensional array. The vector b is stored on each process. Each process computes its result and then process root collects the resutls and returns it in y. local_rows is the number of rows on my_rank cols is the number of columns on each process local_A is a pointer to the matrix on my_rank b is a pointer to the vector b of size cols y is a pointer to the result on the root process. y is significant only on root. */ double *local_y = malloc(sizeof(double)*local_rows); /* Compute the local matrix times vector */ compMatrixTimesVector(local_rows, cols, local_A, b, local_y); int sendcount = local_rows; /* number of doubles sent by process my_rank */ int *reccounts; /* reccounts[i] is the number of doubles received from process i */ int *displs; /* displs for the MPI_Gatherv function */ if (my_rank != root) { /* Send the sendcounts to the root process. reccounts does not matter here. */ MPI_Gather(&sendcount, 1, MPI_INT, reccounts, 1, MPI_INT, root, comm); /* Send the computed results to the root process. The receive buffer, reccounts, and displs do not matter here. */ MPI_Gatherv(local_y, sendcount, MPI_DOUBLE, y, reccounts, displs, MPI_DOUBLE, root, comm); } else /* we are on root process */ { /* Gatter the receive counts from each process */ reccounts = malloc(sizeof(int)*p); MPI_Gather(&sendcount, 1, MPI_INT, reccounts, 1, MPI_INT, 0, comm); /* Calculate displs for MPI_Gatterv */ displs = malloc(sizeof(int)*p); int i; displs[0] = 0; for (i = 1; i < p; i++) displs[i] = displs[i-1] + reccounts[i-1]; /* Gather the results on process 0 */ MPI_Gatherv(local_y, sendcount, MPI_DOUBLE, y, reccounts, displs, MPI_DOUBLE, root, comm); free(displs); free(reccounts); } free(local_y); }
int main(int argc, char **argv) { if (MPI_Init(&argc, &argv) != MPI_SUCCESS) { fprintf(stderr, "MPI initialization failed.\n"); return 1; } int rank, size; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size < 2) { fprintf(stderr, "cant play this game alone.\n"); return 1; } srand(rank + MPI_Wtime()); int sendcount = rand()%10 + 1; char sendbuf[sendcount]; for (int i = 0; i < sendcount; i++) sendbuf[i] = '0' + rank%10; fprintf(stderr, "[ %d ] sendcount: %d\n", rank, sendcount); int recvcounts[size]; if (MPI_Allgather(&sendcount, 1, MPI_INT, &recvcounts, 1, MPI_INT, MPI_COMM_WORLD)) { fprintf(stderr, "MPI_Allgather failed\n"); MPI_Abort(MPI_COMM_WORLD, 1); } int totalcount = 0; for (int i = 0; i < size; i++) totalcount += recvcounts[i]; fprintf(stderr, "[ %d ] totalcount: %d\n", rank, totalcount); char recvbuf[totalcount+1]; memset(recvbuf, 0, sizeof(recvbuf)); if (rank) { if (MPI_Gatherv(sendbuf, sendcount, MPI_CHAR, 0, 0, 0, 0, 0, MPI_COMM_WORLD)) { fprintf(stderr, "MPI_Gatherv failed\n"); MPI_Abort(MPI_COMM_WORLD, 1); } } else { int displs[size]; displs[0] = 0; for (int i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; if (MPI_Gatherv(sendbuf, sendcount, MPI_CHAR, recvbuf, recvcounts, displs, MPI_CHAR, 0, MPI_COMM_WORLD)) { fprintf(stderr, "MPI_Gatherv failed\n"); MPI_Abort(MPI_COMM_WORLD, 1); } fprintf(stderr, "[ %d ] received Gatherv \"%s\"\n", rank, recvbuf); } if (MPI_Bcast(recvbuf, totalcount, MPI_CHAR, 0, MPI_COMM_WORLD)) { fprintf(stderr, "MPI_Bcast failed\n"); MPI_Abort(MPI_COMM_WORLD, 1); } fprintf(stderr, "[ %d ] received Bcast \"%s\"\n", rank, recvbuf); MPI_Finalize(); return 0; }
void collect_x_old(double **x,double *xold) { int i,iunk,loc_inode,nunk_per_proc; int *index=NULL; double *unk_global, *unk_loc; /* allocate temporary arrays */ nunk_per_proc = Nnodes_per_proc*Nunk_per_node; unk_loc = (double *) array_alloc (1, nunk_per_proc, sizeof(double)); for (loc_inode=0; loc_inode < Nnodes_per_proc; loc_inode++ ) for (iunk=0; iunk<Nunk_per_node; iunk++){ unk_loc[iunk+Nunk_per_node*loc_inode] = x[iunk][L2B_node[loc_inode]]; /* always use nodal ordering here */ } if (Proc == 0) { unk_global = (double *) array_alloc (1, Nunknowns, sizeof(double)); index = (int *) array_alloc (1, Nnodes, sizeof(int)); } else { unk_global=NULL; index=NULL; } /* collect the node numbers from all the processors */ MPI_Gatherv(L2G_node,Nnodes_per_proc,MPI_INT, index,Comm_node_proc,Comm_offset_node, MPI_INT,0,MPI_COMM_WORLD); /* collect the unknowns from all the processors */ MPI_Gatherv(unk_loc,nunk_per_proc,MPI_DOUBLE, unk_global,Comm_unk_proc,Comm_offset_unk, MPI_DOUBLE,0,MPI_COMM_WORLD); safe_free((void *) &unk_loc); if (Proc == 0){ for (i=0; i<Nnodes; i++){ for (iunk=0; iunk<Nunk_per_node; iunk++){ xold[index[i]*Nunk_per_node+iunk] = unk_global[i*Nunk_per_node+iunk]; } } safe_free((void *) &unk_global); safe_free((void *) &index); } safe_free((void *) &unk_loc); return; }
//----------------------------------------------------------------------------- // //----------------------------------------------------------------------------- void Image_Exchanger::exchange_fragment_images(unsigned int* databuf, int nviewer, ImageFragment_Tile* ift) { // fprintf(stderr, "**** %s:%s() ****\n", __FILE__, __func__); #ifdef _DEBUG7 fprintf(stderr, "**** %s:%s() ****\n", __FILE__, __func__); #endif unsigned int* sendbuf = databuf + m_sbuf_offset; unsigned int* recvbuf = databuf + m_rbuf_offset; if(nviewer == 1) { MPI_Gatherv((int*)sendbuf, m_scounts[0], MPI_INT, (int*)recvbuf, m_rcounts, m_rdispls, MPI_INT, 0, MPI_COMM_WORLD); } else { MPI_Alltoallv( (int*)sendbuf, m_scounts, m_sdispls, MPI_INT, (int*)recvbuf, m_rcounts, m_rdispls, MPI_INT, MPI_COMM_WORLD); } ift->address_fragments(m_rbuf_offset, m_rdispls); }
void mpi_gatherv_f(char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr) { MPI_Comm c_comm; MPI_Datatype c_sendtype, c_recvtype; int size; OMPI_ARRAY_NAME_DECL(recvcounts); OMPI_ARRAY_NAME_DECL(displs); c_comm = MPI_Comm_f2c(*comm); c_sendtype = MPI_Type_f2c(*sendtype); c_recvtype = MPI_Type_f2c(*recvtype); MPI_Comm_size(c_comm, &size); OMPI_ARRAY_FINT_2_INT(recvcounts, size); OMPI_ARRAY_FINT_2_INT(displs, size); sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf); sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf); recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf); *ierr = OMPI_INT_2_FINT(MPI_Gatherv(sendbuf, OMPI_FINT_2_INT(*sendcount), c_sendtype, recvbuf, OMPI_ARRAY_NAME_CONVERT(recvcounts), OMPI_ARRAY_NAME_CONVERT(displs), c_recvtype, OMPI_FINT_2_INT(*root), c_comm)); }
void invoke() { if (!has_contiguous_data(lhs)) TRIQS_RUNTIME_ERROR << "mpi gather of array into a non contiguous view"; auto c = laz.c; auto recvcounts = std::vector<int>(c.size()); auto displs = std::vector<int>(c.size() + 1, 0); int sendcount = laz.ref.domain().number_of_elements(); auto D = mpi::mpi_datatype<typename A::value_type>(); auto d = laz.domain(); if (laz.all || (laz.c.rank() == laz.root)) resize_or_check_if_view(lhs, d.lengths()); void *lhs_p = lhs.data_start(); const void *rhs_p = laz.ref.data_start(); auto mpi_ty = mpi::mpi_datatype<int>(); if (!laz.all) MPI_Gather(&sendcount, 1, mpi_ty, &recvcounts[0], 1, mpi_ty, laz.root, c.get()); else MPI_Allgather(&sendcount, 1, mpi_ty, &recvcounts[0], 1, mpi_ty, c.get()); for (int r = 0; r < c.size(); ++r) displs[r + 1] = recvcounts[r] + displs[r]; if (!laz.all) MPI_Gatherv((void *)rhs_p, sendcount, D, lhs_p, &recvcounts[0], &displs[0], D, laz.root, c.get()); else MPI_Allgatherv((void *)rhs_p, sendcount, D, lhs_p, &recvcounts[0], &displs[0], D, c.get()); }
void gatherVector(float *localProdVec, int *rowInfo, float *prodVec) { int myRank, numProcs; int i; int *displs, *recvCount; int myRowCount; MPI_Comm_size(MPI_COMM_WORLD, &numProcs); MPI_Comm_rank(MPI_COMM_WORLD, &myRank); displs = (int *) malloc(sizeof(int) * numProcs); recvCount = (int *) malloc(sizeof(int) * numProcs); if (myRank == ROOT) { for (i = 0; i < numProcs; i++) { //get offset and count of rows to be received from proc i displs[i] = rowInfo[i]; recvCount[i] = rowInfo[i+numProcs] - rowInfo[i] + 1; } } myRowCount = rowInfo[myRank+numProcs] - rowInfo[myRank] + 1; //gather this computed vector at root MPI_Gatherv(localProdVec, myRowCount, MPI_FLOAT, prodVec, recvCount, displs, MPI_FLOAT, ROOT, MPI_COMM_WORLD); free(displs); free(recvCount); }
// computes A*mat and stores result on the rank 0 process in matProd (assumes the memory has already been allocated) void distributedMatMatProd(const double *localRowChunk, const double *mat, double *matProd, const distMatrixInfo *matInfo, const distGatherInfo *eigInfo, scratchMatrices * scratchSpace) { multiplyAChunk(localRowChunk, mat, scratchSpace->Scratch3, matInfo->localrows, matInfo->numcols, eigInfo->numeigs); if (matInfo->mpi_rank != 0) { MPI_Gatherv(scratchSpace->Scratch3, matInfo->localrows*eigInfo->numeigs, MPI_DOUBLE, NULL, NULL, NULL, MPI_DOUBLE, 0, *(matInfo->comm)); } else { MPI_Gatherv(scratchSpace->Scratch3, matInfo->localrows*eigInfo->numeigs, MPI_DOUBLE, matProd, eigInfo->elementcounts, eigInfo->elementoffsets, MPI_DOUBLE, 0, *(matInfo->comm)); } }
double time_gatherv(struct collParams* p) { int i, size2; int disp = 0; for ( i = 0; i < p->nranks; i++) { int size2 = i % (p->size+1); recvcounts[i] = size2; rdispls[i] = disp; disp += size2; } MPI_Barrier(MPI_COMM_WORLD); size2 = p->myrank % (p->size+1); __TIME_START__; for (i = 0; i < p->iter; i++) { MPI_Gatherv(sbuffer, size2, p->type, rbuffer, recvcounts, rdispls, p->type, p->root, p->comm); __BAR__(p->comm); } __TIME_END__; if (check_buffers) { check_sbuffer(p->myrank); if (p->myrank == p->root) { for (i = 0; i < p->nranks; i++) { check_rbuffer(rbuffer, rdispls[i], i, 0, recvcounts[i]); } } } return __TIME_USECS__ / (double)p->iter; }
void Coll_ids_at_Master( reax_system *system, storage *workspace, mpi_datatypes *mpi_data ) { int i; int *id_list; MPI_Gather( &system->n, 1, MPI_INT, workspace->rcounts, 1, MPI_INT, MASTER_NODE, mpi_data->world ); if( system->my_rank == MASTER_NODE ){ workspace->displs[0] = 0; for( i = 1; i < system->wsize; ++i ) workspace->displs[i] = workspace->displs[i-1] + workspace->rcounts[i-1]; } id_list = (int*) malloc( system->n * sizeof(int) ); for( i = 0; i < system->n; ++i ) id_list[i] = system->my_atoms[i].orig_id; MPI_Gatherv( id_list, system->n, MPI_INT, workspace->id_all, workspace->rcounts, workspace->displs, MPI_INT, MASTER_NODE, mpi_data->world ); free( id_list ); #if defined(DEBUG) if( system->my_rank == MASTER_NODE ) { for( i = 0 ; i < system->bigN; ++i ) fprintf( stderr, "id_all[%d]: %d\n", i, workspace->id_all[i] ); } #endif }
ubjson::Value Master::ExportSimulation() { // This method is a control method, so sends orders from master 0 to other // masters if (id_ == 0) { order_ = Order::EXPORT_SIMULATION; MPI_Bcast(&order_, 1, MPI_INT, 0, MasterComm_); } ubjson::Value local_agents; std::vector<ubjson::Value> local_agents_by_types(nb_types_); for (AgentHandler &agent_handler : agent_handlers_) { agent_handler.GetJsonNodes(local_agents_by_types); } for (auto &type : agent_type_to_string_) { local_agents[type.second] = std::move(local_agents_by_types.at(type.first)); } // Now all the infos must be gathered in master 0 std::ostringstream local_data_stream; ubjson::StreamWriter<std::ostringstream> writer(local_data_stream); writer.writeValue(local_agents); std::string local_data = local_data_stream.str(); int local_data_size = local_data.size(); // First master 0 must know how much data it will receive std::vector<int> sizes_to_receive; if (id_ == 0) { sizes_to_receive.resize(nb_masters_); } MPI_Gather(&local_data_size, 1, MPI_INT, sizes_to_receive.data(), 1, MPI_INT, 0, MasterComm_); // Storing the results in 'results' std::vector<std::string> results; if (id_ == 0) { for (int i=0; i<nb_masters_; i++) { results.emplace_back(std::string(sizes_to_receive.at(i), '0')); } } std::vector<int> displs; if (id_ == 0) { for (int i=0; i<nb_masters_; i++) { displs.push_back(results.at(i).data()-(char*)results.data()); } } MPI_Gatherv((void*)local_data.data(), local_data_size, MPI_UNSIGNED_CHAR, (void*)results.data(), sizes_to_receive.data(), displs.data(), MPI_UNSIGNED_CHAR, 0, MasterComm_); // Grouping the results ubjson::Value agents; for (auto &master_agents : results) { ubjson::Value masters_value; std::istringstream s(master_agents); ubjson::StreamReader<std::istringstream> reader(s); masters_value = reader.getNextValue(); for (auto &type : agent_type_to_string_) { for (auto &agent : masters_value[type.second]) { agents[type.second].push_back(agent); } } } ubjson::Value final; final["agents"] = agents;
void Coll_rvecs_at_Master( reax_system *system, storage *workspace, mpi_datatypes *mpi_data, rvec* v ) { MPI_Gatherv( v, system->n, mpi_data->mpi_rvec, workspace->f_all, workspace->rcounts, workspace->displs, mpi_data->mpi_rvec, MASTER_NODE, mpi_data->world ); }
static void gather(const communicator& comm, const std::vector<T>& in, std::vector< std::vector<T> >& out, int root) { std::vector<int> counts(comm.size()); Collectives<int,void*>::gather(comm, (int) in.size(), counts, root); std::vector<int> offsets(comm.size(), 0); for (unsigned i = 1; i < offsets.size(); ++i) offsets[i] = offsets[i-1] + counts[i-1]; std::vector<T> buffer(offsets.back() + counts.back()); MPI_Gatherv(Datatype::address(const_cast<T&>(in[0])), in.size(), Datatype::datatype(), Datatype::address(buffer[0]), &counts[0], &offsets[0], Datatype::datatype(), root, comm); out.resize(comm.size()); size_t cur = 0; for (unsigned i = 0; i < (unsigned)comm.size(); ++i) { out[i].reserve(counts[i]); for (unsigned j = 0; j < (unsigned)counts[i]; ++j) out[i].push_back(buffer[cur++]); } }
void mpi_gatherv (void *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, void *recvbuf, MPI_Fint *recvcounts, MPI_Fint *displs, MPI_Fint *recvtype, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *__ierr) { *__ierr = MPI_Gatherv (sendbuf, *sendcount, MPI_Type_f2c (*sendtype), recvbuf, recvcounts, displs, MPI_Type_f2c (*recvtype), *root,MPI_Comm_f2c (*comm)); }
std::vector<int> gather_vectors(std::vector<int>& local_vec, MPI_Comm comm) { // get MPI parameters int rank; int p; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &p); // get local size int local_size = local_vec.size(); // init result std::vector<int> result; // master process: receive results if (rank == 0) { // gather local array sizes, sizes are restricted to `int` by MPI anyway // therefore use int std::vector<int> local_sizes(p); MPI_Gather(&local_size, 1, MPI_INT, &local_sizes[0], 1, MPI_INT, 0, comm); // gather-v to collect all the elements int total_size = std::accumulate(local_sizes.begin(), local_sizes.end(), 0); result.resize(total_size); // get receive displacements std::vector<int> displs(p, 0); for (int i = 1; i < p; ++i) displs[i] = displs[i-1] + local_sizes[i-1]; // gather v the vector data to the root MPI_Gatherv(&local_vec[0], local_size, MPI_INT, &result[0], &local_sizes[0], &displs[0], MPI_INT, 0, comm); } // else: send results else { // gather local array sizes MPI_Gather(&local_size, 1, MPI_INT, NULL, 1, MPI_INT, 0, comm); // sent the actual data MPI_Gatherv(&local_vec[0], local_size, MPI_INT, NULL, NULL, NULL, MPI_INT, 0, comm); } return result; }
FC_FUNC( mpi_gatherv , MPI_GATHERV ) ( void *sendbuf, int *sendcount, int *sendtype, void *recvbuf, int *recvcounts, int *displs, int *recvtype, int *root, int *comm, int *ierror) { *ierror=MPI_Gatherv( mpi_c_in_place(sendbuf), *sendcount, *sendtype, recvbuf, recvcounts, displs, *recvtype, *root, *comm); }
// **************************************************************************** // Method: avtImgCommunicator:: // // Purpose: // Send the metadata needed by the root node to make decisions // // Arguments: // arraySize : the number of elements being sent // allIotaMetadata : the metadata bieng sent // // Programmer: Pascal Grosset // Creation: July 2013 // // Modifications: // // **************************************************************************** void avtImgCommunicator::gatherIotaMetaData(int arraySize, float *allIotaMetadata){ #ifdef PARALLEL int *recvSizePerProc = NULL; float *tempRecvBuffer = NULL; int *offsetBuffer = NULL; if (my_id == 0){ tempRecvBuffer = new float[totalPatches*7]; // x7: procId, patchNumber, dims[0], dims[1], screen_ll[0], screen_ll[1], avg_z recvSizePerProc = new int[num_procs]; offsetBuffer = new int[num_procs]; for (int i=0; i<num_procs; i++){ if (i == 0) offsetBuffer[i] = 0; else offsetBuffer[i] = offsetBuffer[i-1] + recvSizePerProc[i-1]; recvSizePerProc[i] = processorPatchesCount[i]*7; } } MPI_Gatherv(allIotaMetadata, arraySize, MPI_FLOAT, tempRecvBuffer, recvSizePerProc, offsetBuffer,MPI_FLOAT, 0, MPI_COMM_WORLD);// all send to proc 0 if (my_id == 0){ allRecvIotaMeta = new iotaMeta[totalPatches]; // allocate space to receive the many patches iotaMeta tempPatch; for (int i=0; i<totalPatches; i++){ tempPatch.procId = (int) tempRecvBuffer[i*7 + 0]; tempPatch.patchNumber = (int) tempRecvBuffer[i*7 + 1]; tempPatch.dims[0] = (int) tempRecvBuffer[i*7 + 2]; tempPatch.dims[1] = (int) tempRecvBuffer[i*7 + 3]; tempPatch.screen_ll[0] =(int) tempRecvBuffer[i*7 + 4]; tempPatch.screen_ll[1] =(int) tempRecvBuffer[i*7 + 5]; tempPatch.avg_z = tempRecvBuffer[i*7 + 6]; int patchIndex = getDataPatchID(tempPatch.procId, tempPatch.patchNumber); allRecvIotaMeta[patchIndex] = setIota(tempPatch.procId, tempPatch.patchNumber, tempPatch.dims[0], tempPatch.dims[1], tempPatch.screen_ll[0], tempPatch.screen_ll[1], tempPatch.avg_z); all_avgZ_proc0.insert(tempPatch.avg_z); //insert avg_zs into the set to keep a count of the total number of avg_zs } if (recvSizePerProc != NULL) delete []recvSizePerProc; recvSizePerProc = NULL; if (offsetBuffer != NULL) delete []offsetBuffer; offsetBuffer = NULL; if (tempRecvBuffer != NULL) delete []tempRecvBuffer; tempRecvBuffer = NULL; } #endif }
/* * Class: mpi_Intracomm * Method: Gatherv * Signature: (Ljava/lang/Object;IILmpi/Datatype;Ljava/lang/Object;I[I[ILmpi/Datatype;I)V */ JNIEXPORT void JNICALL Java_mpi_Intracomm_gatherv(JNIEnv *env, jobject jthis, jobject sendbuf, jint sendoffset, jint sendcount, jobject sendtype, jobject recvbuf, jint recvoffset, jintArray recvcounts, jintArray displs, jobject recvtype, jint root) { int id ; jint *rcount = NULL, *dps = NULL; jboolean isCopy ; MPI_Comm mpi_comm = (MPI_Comm) ((*env)->GetLongField(env,jthis,ompi_java.CommhandleID)) ; MPI_Datatype mpi_stype = (MPI_Datatype) ((*env)->GetLongField(env,sendtype,ompi_java.DatatypehandleID)) ; MPI_Datatype mpi_rtype = mpi_stype; int sbaseType = (*env)->GetIntField(env, sendtype, ompi_java.DatatypebaseTypeID) ; int rbaseType = 0; void *sendptr, *recvptr = NULL; void *sbufbase, *rbufbase ; ompi_java_clearFreeList(env) ; MPI_Comm_rank(mpi_comm, &id) ; if(id == root) { rcount=(*env)->GetIntArrayElements(env,recvcounts,&isCopy); dps=(*env)->GetIntArrayElements(env,displs,&isCopy); mpi_rtype = (MPI_Datatype) ((*env)->GetLongField(env,recvtype,ompi_java.DatatypehandleID)) ; rbaseType = (*env)->GetIntField(env, recvtype, ompi_java.DatatypebaseTypeID) ; recvptr = ompi_java_getBufPtr(&rbufbase, env, recvbuf, rbaseType, recvoffset) ; } sendptr = ompi_java_getBufPtr(&sbufbase, env, sendbuf, sbaseType, sendoffset) ; MPI_Gatherv(sendptr, sendcount, mpi_stype, recvptr, (int*) rcount, (int*) dps, mpi_rtype, root, mpi_comm) ; ompi_java_releaseBufPtr(env, sendbuf, sbufbase, sbaseType) ; if (id == root) { ompi_java_releaseBufPtr(env, recvbuf, rbufbase, rbaseType); } if (id == root) { (*env)->ReleaseIntArrayElements(env,recvcounts,rcount,JNI_ABORT); (*env)->ReleaseIntArrayElements(env,displs,dps,JNI_ABORT); } }
void dd_gatherv(gmx_domdec_t *dd, int scount,void *sbuf, int *rcounts,int *disps,void *rbuf) { #ifdef GMX_MPI MPI_Gatherv(sbuf,scount,MPI_BYTE, rbuf,rcounts,disps,MPI_BYTE, DDMASTERRANK(dd),dd->mpi_comm_all); #endif }
static void gather(const communicator& comm, const std::vector<T>& in, int root) { Collectives<int,void*>::gather(comm, (int) in.size(), root); MPI_Gatherv(Datatype::address(const_cast<T&>(in[0])), in.size(), Datatype::datatype(), 0, 0, 0, Datatype::datatype(), root, comm); }
void reduce(int words_amount, char* words, int* indexes, int* list) { int nbytes; int i, j; int* count = NULL; int* skip = NULL; int* count_bytes = NULL; int* skip_bytes = NULL; if (world_rank == master) { count = calloc(world_size, sizeof(int)); skip = calloc(world_size, sizeof(int)); count_bytes = calloc(world_size, sizeof(int)); skip_bytes = calloc(world_size, sizeof(int)); divide(world_size, words_amount, words, count, skip); bytes(world_size, indexes, count, skip, count_bytes, skip_bytes); for (i=words_amount-1, j=world_size-1; i>=0; i--) { if (i<skip[j]) j--; indexes[i] -= indexes[skip[j]-1]; } for (i=0; i<world_size; i++) debug("R count[%d] = %d, count_bytes[%d] = %d, skip[%d] = %d, skip_bytes[%d] = %d\n", i, count[i], i, count_bytes[i], i, skip[i], i, skip_bytes[i]); for (i=0; i<words_amount; i++) debug("R indexes[%d] = %d\n", i, indexes[i]); } /* rozsylanie ilosci slow przydzielonych procesom */ /* zapisane w tablicy count, w procesie zapisywane do zmiennej words_amount */ MPI_Scatter(count, 1, MPI_INT, &words_amount, 1, MPI_INT, master, MPI_COMM_WORLD); debug("[%d] Got %d words to reduce\n", world_rank, words_amount); if (world_rank != master) /* jesli proces nie jest master, to trzeba mu przygotowac indexes */ indexes = calloc(words_amount, sizeof(int)); MPI_Scatter(count_bytes, 1, MPI_INT, &nbytes, 1, MPI_INT, master, MPI_COMM_WORLD); if (world_rank != master) list = calloc(nbytes, sizeof(int)); debug("[%d] Got %d bytes of list to reduce\n", world_rank, nbytes); MPI_Scatterv(list, count_bytes, skip_bytes, MPI_INT, list, nbytes, MPI_INT, master, MPI_COMM_WORLD); MPI_Scatterv(indexes, count, skip, MPI_INT, indexes, words_amount, MPI_INT, master, MPI_COMM_WORLD); for (i=0; i<words_amount; i++) debug("[%d] Index %d\n", world_rank, indexes[i]); for (i=0, j=0; i<words_amount; i++) { int sum = 0; for (; j<indexes[i]; j++) sum += list[j]; indexes[i] = sum; debug("[%d] SUM %d = %d\n", world_rank, i, indexes[i]); } MPI_Gatherv(indexes, words_amount, MPI_INT, indexes, count, skip, MPI_INT, master, MPI_COMM_WORLD); }
// Gather the results of the computation at the root void gather_pres() { MPI_Gatherv(local_pres0 + local_width + 3, // sendbuf 1, // sendcount local_pres_t, // sendtype pres + imageSize + 3, // recvebuf counts, // recvcounts displs, // displs pres_and_diverg_t, // recvtype ROOT, // root cart_comm); // comm }
void parallelMatrixTimesVector(int num_local_rows, int cols, double *local_A, double *b, double *y, int root, int my_rank, int p, MPI_Comm comm) { /* This function performs parallel matrix-vector multiplication of a matrix A times vector b. The matrix is distributed by rows. Each process contains (num_local_rows)x(cols) matrix local_A stored as a one-dimensional array. The vector b is stored on each process. Each process computes its result and then process root collects the resutls and returns it in y. num_local_rows is the number of rows on my_rank cols is the number of columns on each process local_A is a pointer to the matrix on my_rank b is a pointer to the vector b of size cols y is a pointer to the result on the root process. y is significant only on root. */ /* Allocate memory for the local result on my_rank */ double *local_y = malloc(sizeof(double)*num_local_rows); /* Compute the local matrix times vector */ compMatrixTimesVector(num_local_rows, cols, local_A, b, local_y); int *reccounts; /* reccounts[i] is the number of doubles to be received from process i */ int *displs; /* displs for the MPI_Gatherv function */ if (my_rank==root) reccounts = malloc(sizeof(int)*p); /* Gatter num_local_rows from each process */ MPI_Gather(&num_local_rows, 1, MPI_INT, reccounts, 1, MPI_INT, root, comm); if (my_rank==root) { /* Calculate displs for MPI_Gatterv */ int i; displs = malloc(sizeof(int)*p); displs[0] = 0; for (i = 1; i < p; i++) displs[i] = displs[i-1] + reccounts[i-1]; } MPI_Gatherv(local_y, num_local_rows, MPI_DOUBLE, y, reccounts, displs, MPI_DOUBLE, root, comm); if (my_rank==root) { free(displs); free(reccounts); } free(local_y); }
int PLA_MPI_Gatherv( void * sendbuf, int scount, MPI_Datatype stype, void * recvbuf, int * rcounts, int * displs, MPI_Datatype rtype, int root, MPI_Comm comm) { return (MPI_Gatherv ( sendbuf, scount, stype, recvbuf, rcounts, displs, rtype, root, comm )); }
void LBNSCommunicator::gatherArray( int& count, std::vector<int>& data_size, std::vector<int>& data_displ, std::vector<int>& data){ int rank=0; int com_size=0; MPI_Comm_rank(MPI_COMM_WORLD,&rank); MPI_Comm_size(MPI_COMM_WORLD,&com_size); if(data_size.size()==0){ count=data.size(); data_size.resize(1); if(rank==_maxSizeCommunicators[_index*2+1]) data_size.resize(com_size); MPI_Gather(&count,1, MPI_INT, &data_size[0], 1, MPI_INT,_maxSizeCommunicators[_index*2+1], MPI_COMM_WORLD); if(rank==_maxSizeCommunicators[_index*2+1]){ data_displ.resize(com_size); data_displ[rank]=0; for(int i=0;i<com_size;i++){ if(i!=rank){ data_displ[i]=count; count+=data_size[i]; } } } } data.resize(count); if(rank==_maxSizeCommunicators[_index*2+1]) MPI_Gatherv(MPI_IN_PLACE,0, MPI_INT,&data[0], &data_size[0],&data_displ[0], MPI_INT,_maxSizeCommunicators[_index*2+1], MPI_COMM_WORLD); else MPI_Gatherv(&data[0],count, MPI_INT,&data[0], &data_size[0],&data_displ[0], MPI_INT,_maxSizeCommunicators[_index*2+1], MPI_COMM_WORLD); }
void dump_time_field(char* file_prefix, grid_parms grid, double field) { MPI_Status status; MPI_Offset displacement = 0; MPI_File fw; char* buffer = (char*)checked_malloc(NUM_DBL_TO_CHAR_BYTES * sizeof(char), SRC_LOC); char* write_buffer; int root = 0; char filename[50]; int length = sprintf(buffer, "%2.12lf\n", field); int *recv_count = (int*) checked_malloc(grid.num_ranks_branch * sizeof(int), SRC_LOC); int *disp = (int*) checked_malloc(grid.num_ranks_branch * sizeof(int), SRC_LOC); /// Gathering the lengths of buffer from each MPI process. CHECK_MPI_ERROR(MPI_Gather(&length, 1, MPI_INT, recv_count, 1, MPI_INT, root, grid.cart_comm)); int total_buffer_length = 0; for (int i = 0; i < grid.num_ranks_branch; i++) { disp[i] = total_buffer_length; total_buffer_length += recv_count[i]; } if (grid.rank_branch == 0) { write_buffer = (char*) checked_malloc(total_buffer_length * sizeof(char), SRC_LOC); } // Gathering the buffers from all MPI processes. CHECK_MPI_ERROR(MPI_Gatherv(buffer, length, MPI_CHAR, write_buffer, recv_count, disp, MPI_CHAR, root, grid.cart_comm)); if (grid.rank_branch == 0) { sprintf(filename, "%s/%s_%d_%d.txt", grid.time_profiling_dir, file_prefix, grid.domain_index, grid.branch_tag); CHECK_MPI_ERROR(MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fw)); CHECK_MPI_ERROR(MPI_File_write_at(fw, 0, write_buffer, total_buffer_length, MPI_CHAR, &status)); MPI_File_close(&fw); } if (grid.rank_branch == 0) { free(write_buffer); } free(recv_count); free(buffer); free(disp); }
PetscErrorCode writeProfileSurfaceScalarData(char *fileName, PetscScalar *arr, PetscInt numValsPerProfile, PetscTruth appendToFile) { PetscErrorCode ierr; PetscScalar *tmpArr; PetscInt *displs, *rcounts, cumpro; PetscInt ipro; size_t m1, m2; /* off_t off, offset; */ PetscViewer fd; PetscInt fp; /* PetscInt iShift; */ PetscMPIInt numProcessors, myId; ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&myId);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&numProcessors);CHKERRQ(ierr); m1 = numValsPerProfile*totalNumProfiles*sizeof(PetscScalar); m2 = numProcessors*sizeof(PetscInt); /* Allocate memory for temporary arrays */ ierr = PetscMalloc(m1,&tmpArr);CHKERRQ(ierr); ierr = PetscMalloc(m2,&displs);CHKERRQ(ierr); ierr = PetscMalloc(m2,&rcounts);CHKERRQ(ierr); cumpro=0; for (ipro=1; ipro<=numProcessors; ipro++) { displs[ipro-1]=numValsPerProfile*cumpro; rcounts[ipro-1]=numValsPerProfile*gNumProfiles[ipro-1]; cumpro = cumpro + gNumProfiles[ipro-1]; /* ierr=PetscPrintf(PETSC_COMM_WORLD,"cumpro=%d, displs=%d\n",cumpro,displs[ipro-1],rcounts[ipro-1]);CHKERRQ(ierr); */ } MPI_Gatherv(arr,numValsPerProfile*lNumProfiles,MPI_DOUBLE,tmpArr,rcounts,displs,MPI_DOUBLE,0, PETSC_COMM_WORLD); if (myId==0) { /* this shouldn't really be necessary, but without it, all processors seem to be writing in append mode */ if (appendToFile) { ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,fileName,FILE_MODE_APPEND,&fd);CHKERRQ(ierr); } else { ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,fileName,FILE_MODE_WRITE,&fd);CHKERRQ(ierr); } ierr = PetscViewerBinaryGetDescriptor(fd,&fp);CHKERRQ(ierr); ierr = PetscBinaryWrite(fp,tmpArr,numValsPerProfile*totalNumProfiles,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); ierr = PetscViewerDestroy(fd);CHKERRQ(ierr); } ierr = PetscFree(tmpArr);CHKERRQ(ierr); ierr = PetscFree(displs);CHKERRQ(ierr); ierr = PetscFree(rcounts);CHKERRQ(ierr); return 0; }
void generateSolutions() { int i,j,k; sizeSol=0; solutionCounter = 0; MPI_Allgather(&my_frontSize,1,MPI_INT, frontSize_all,1,MPI_INT, MPI_COMM_WORLD); int count=0; for(i=0;i<NP;i++) { if(x_rank[i]==1) { memcpy(&collection_nonDom_x[count*D], &x_variable[i*D],D*sizeof(double)); memcpy(&collection_nonDom_fit[count*Nobj], &x_fitness[i*Nobj],Nobj*sizeof(double)); count++; } } update_recv_disp(frontSize_all,D); MPI_Gatherv(collection_nonDom_x,my_frontSize*D,MPI_DOUBLE, finalSolutions,recv_size,disp_size,MPI_DOUBLE, 0,MPI_COMM_WORLD); update_recv_disp(frontSize_all,Nobj); MPI_Gatherv(collection_nonDom_fit,my_frontSize*Nobj,MPI_DOUBLE, finalFitness,recv_size,disp_size,MPI_DOUBLE, 0,MPI_COMM_WORLD); for(i=0;i<numSpecies;i++) { sizeSol += frontSize_all[i]; } if(mpi_rank==0) nonDominatedSorting(finalFitness,finalRank,sizeSol,sizeSol); }
// computes means of the rows of A, subtracts them from A, and returns them in meanVec on the root process // assumes memory has already been allocated for meanVec void computeAndSubtractRowMeans(double *localRowChunk, double *meanVec, distMatrixInfo *matInfo) { int mpi_rank = matInfo->mpi_rank; int numcols = matInfo->numcols; int localrows = matInfo->localrows; int * rowcounts = matInfo->rowcounts; int * rowoffsets = matInfo->rowoffsets; MPI_Comm *comm = matInfo->comm; double *onesVec = (double *) malloc( numcols * sizeof(double)); double *localMeanVec = (double *) malloc( localrows * sizeof(double)); for(int idx = 0; idx < numcols; idx = idx + 1) { onesVec[idx]=1; } cblas_dgemv(CblasRowMajor, CblasNoTrans, localrows, numcols, 1.0/((double)numcols), localRowChunk, numcols, onesVec, 1, 0, localMeanVec, 1); cblas_dger(CblasRowMajor, localrows, numcols, -1.0, localMeanVec, 1, onesVec, 1, localRowChunk, numcols); if (mpi_rank != 0) { MPI_Gatherv(localMeanVec, localrows, MPI_DOUBLE, NULL, NULL, NULL, MPI_DOUBLE, 0, *comm); } else { MPI_Gatherv(localMeanVec, localrows, MPI_DOUBLE, meanVec, rowcounts, rowoffsets, MPI_DOUBLE, 0, *comm); } free(onesVec); free(localMeanVec); }
int MPI_Gatherv_Wrapper(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int *recvcount, int *recvdisp, MPI_Datatype recvtype, int root, MPI_Comm comm) { #ifdef COMMPI char *me = ft_mpi_routine_names[MPI_Gatherv_cntr]; int ierr; FT_INITIALIZE(me, ft_global_ht) ft_mpi_cntrs[MPI_Total_cntr]++; ft_mpi_cntrs[MPI_Gatherv_cntr]++; ierr = MPI_Gatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvdisp, recvtype, root, comm); FT_FINALIZE(me, ft_global_ht, 1) return(ierr); #else return(0); #endif }