void scatter_data( void *globalI, void *localB, int ny, int nxp, int nyp, int dims[], int rank, int comm2d, MPI_Datatype scatter[]) { int i,j; MPI_Request request; MPI_Status status; float *globalImage = (float *) globalI; float *localBuf = (float *) localB; for(i=0; i<dims[1]; ++i) { for(j=0; j<dims[0]; ++j) { if(i == dims[1]-1 && j == dims[0]-1) MPI_Issend(&globalImage[((i*nxp)*ny)+(j*nyp)], 1, scatter[4], (dims[1]*j)+i, (dims[1]*j)+i, comm2d, &request); else if(i == dims[1]-1){ MPI_Issend(&globalImage[((i*nxp)*ny)+(j*nyp)], 1, scatter[1], (dims[1]*j)+i, (dims[1]*j)+i, comm2d, &request);} else if(j == dims[0]-1) MPI_Issend(&globalImage[((i*nxp)*ny)+(j*nyp)], 1, scatter[0], (dims[1]*j)+i, (dims[1]*j)+i, comm2d, &request); else MPI_Issend(&globalImage[((i*nxp)*ny)+(j*nyp)], 1, scatter[3], (dims[1]*j)+i, (dims[1]*j)+i, comm2d, &request); if(0 == i && 0 == j) { MPI_Recv(localBuf, nxp*nyp, MPI_FLOAT, 0, rank, comm2d, &status); } MPI_Wait(&request, &status); } } }
void send_halos(void *localO, int left, int right, int up, int down, int comm2d, int tag, int nxp, int nyp, MPI_Datatype mrows, MPI_Datatype mcols) { MPI_Status status; MPI_Request request; float *localOld = (float *) localO; /*send down*/ MPI_Issend(&localOld[(1*(nyp+2))+nyp], 1, mrows, down, tag, comm2d, &request); MPI_Recv(&localOld[(1*(nyp+2))], 1, mrows, up, tag, comm2d, &status); MPI_Wait(&request, &status); /* send up*/ MPI_Issend(&localOld[(1*(nyp+2))+1], 1, mrows, up, tag, comm2d, &request); MPI_Recv(&localOld[(1*(nyp+2))+nyp+1], 1, mrows, down, tag, comm2d, &status); MPI_Wait(&request, &status); /*send right*/ MPI_Issend(&localOld[(nxp*(nyp+2))+1], 1, mcols, right, tag, comm2d, &request); MPI_Recv(&localOld[1], 1, mcols, left, tag, comm2d, &status); MPI_Wait(&request, &status); /*send left*/ MPI_Issend(&localOld[(1*(nyp+2))+1], 1, mcols, left, tag, comm2d, &request); MPI_Recv(&localOld[(nxp+1)*(nyp+2)+1], 1, mcols, right, tag, comm2d, &status); MPI_Wait(&request, &status); }
int MPIX_Issend_x(BIGMPI_CONST void *buf, MPI_Count count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request) { int rc = MPI_SUCCESS; if (likely (count <= bigmpi_int_max )) { rc = MPI_Issend(buf, (int)count, datatype, dest, tag, comm, request); } else { MPI_Datatype newtype; BigMPI_Type_contiguous(0,count, datatype, &newtype); MPI_Type_commit(&newtype); rc = MPI_Issend(buf, 1, newtype, dest, tag, comm, request); MPI_Type_free(&newtype); } return rc; }
int comm_send_sparse_row(const switchboard_t* sb, outbox_t* outbox, sparse_row_t* row) { coord_t column = row->starting_column_; if (is_local(sb, column)) { vpu_t* vpu = vpu_for_column(sb, column); assert(NULL != vpu); vpu_recv_row(vpu, row, ROW_SPARSE); return 0; } #ifdef WITH_MPI else { // ship to remote process MPI_Request* req_p = requests_list_extend1(&(outbox->requests)); row_t* row_p = rows_list_extend1(&(outbox->rows)); row_p->data = row; /* will free row when MPI req is complete */ row_p->kind = ROW_SPARSE; /* XXX: send row as a byte string - assume homogeneous arch here */ const int rc = MPI_Issend(row, sparse_row_ub(row) - sparse_row_lb(row), MPI_BYTE, owner(sb, column), TAG_ROW_SPARSE, MPI_COMM_WORLD, req_p); return rc; }; #endif // WITH_MPI assert(false); return -1; // should not happen! }
static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) { PetscErrorCode ierr; PetscMPIInt nrecvs,tag,done,i; MPI_Aint lb,unitbytes; char *tdata; MPI_Request *sendreqs,barrier; PetscSegBuffer segrank,segdata; PetscFunctionBegin; ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); tdata = (char*)todata; ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); for (i=0; i<nto; i++) { ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); } ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); nrecvs = 0; barrier = MPI_REQUEST_NULL; for (done=0; !done; ) { PetscMPIInt flag; MPI_Status status; ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); if (flag) { /* incoming message */ PetscMPIInt *recvrank; void *buf; ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); *recvrank = status.MPI_SOURCE; ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); nrecvs++; } if (barrier == MPI_REQUEST_NULL) { PetscMPIInt sent,nsends; ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); if (sent) { #if defined(PETSC_HAVE_MPI_IBARRIER) ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); #elif defined(PETSC_HAVE_MPIX_IBARRIER) ierr = MPIX_Ibarrier(comm,&barrier);CHKERRQ(ierr); #endif ierr = PetscFree(sendreqs);CHKERRQ(ierr); } } else { ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); } } *nfrom = nrecvs; ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); PetscFunctionReturn(0); }
void MpiRequestPool::pushDetachedRequest(Request* r) { int ierr; r->sizeReqDone = false; switch (r->type) { case SEND: { assert(r->d->tag != 0); // Only permit one concurrent send to the same (to, tag) pair. Additionnal // requests are put on a waiting queue. auto p = std::make_pair(r->to, r->d->tag); if (sendsInFlight.find(p) != sendsInFlight.end()) { waiting[p].push_back(r); } else { sendsInFlight.insert(p); // std::cout << "SEND(" << r->count << ", to = " << r->to << ", tag = " // << r->d->tag << ")" << std::endl; ierr = MPI_Issend(&r->count, 1, MPI_UNSIGNED_LONG_LONG, r->to, r->d->tag, MPI_COMM_WORLD, &r->req); assert(!ierr); } } break; case RECV: { // std::cout << "RECV(" << ", from = " << r->from << ", tag = " // << r->d->tag << ")" << std::endl; ierr = MPI_Irecv(&r->count, 1, MPI_UNSIGNED_LONG_LONG, r->from, r->d->tag, MPI_COMM_WORLD, &r->req); assert(!ierr); } break; } detached.push_back(r); }
//---------------------------------------------------------------- void Trans(void* addr, MPI_Datatype mpi_dt, SCUDir dir, SCUXR sendrx){ MPI_Request request; // Determine the NN in the given direction: int nnPE = nnList[dir]; // Initiate the send or recieve: if( sendrx == SCU_SEND ) MPI_Issend( addr, // base-address of the data 1, // Number of items to send, one datatype mpi_dt, // MPI datatype to send nnPE, // ID of destination PE dir, // Message-tag based on dirn Cart_Comm, // The communicator &request // RETURNS, the request handle ); else MPI_Irecv( addr, // base-address of the data 1, // Number of items to recieve, one struct mpi_dt, // MPI datatype to recv nnPE, // ID of source PE dir-((dir%2)*2-1), // Tag based on dirn Cart_Comm, // The communicator &request // RETURNS, the request handle ); // Add the new request to the req. handler: ReqMan->AddRequest(request); return; }
void event_Ssend(event_queue_t queue,void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm){ MPI_Request request; MPI_Status status; int res = MPI_Issend(buf,count,datatype,dest,tag,comm,&request); if (res != MPI_SUCCESS) Abort("MPI_Issend"); event_wait(queue,&request,&status); }
void event_Issend(event_queue_t queue,void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm,event_callback cb,void*context){ ensure_access(queue->man,queue->pending); int res = MPI_Issend(buf,count,datatype,dest,tag,comm,&queue->request[queue->pending]); if (res != MPI_SUCCESS) Abort("MPI_Issend"); queue->cb[queue->pending]=cb?cb:null_cb; // we cannot allow NULL as a call-back. queue->context[queue->pending]=context; queue->pending++; }
int main(int argc, char **argv){ int rank, size, *buf, **bufs, i, sum = 0, count = 0; MPI_Status status; MPI_Request request; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); bufs = malloc(sizeof(int*) * size); buf = malloc(sizeof(int)); buf[0] = rank; bufs[0] = buf; MPI_Issend(buf, 1, MPI_INT, neighbour(rank, size), 0, MPI_COMM_WORLD, &request); printf("Processor %d sent message with value %d\n", rank, buf[0]); while (count < size){ count++; buf = malloc(sizeof(int)); bufs[count] = buf; MPI_Recv(buf, 1, MPI_INT, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status); if (buf[0] == rank) { printf("Processor %d received self message\n", rank); } else { MPI_Issend(buf, 1, MPI_INT, neighbour(rank, size), 0, MPI_COMM_WORLD, &request); printf("Processor %d received and sent message with value %d\n", rank, buf[0]); } sum += buf[0]; } for (i = 0; i < size; i++){ free(bufs[i]); } free(bufs); printf("Processor %d stoped, sum = %d\n", rank, sum); MPI_Finalize(); return 0; }
void mpi_issend (void *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *__ierr) { MPI_Request lrequest; *__ierr = MPI_Issend (buf, (int) *count, MPI_Type_f2c (*datatype), (int) *dest, (int) *tag, MPI_Comm_f2c (*comm), &lrequest); *request = MPI_Request_c2f (lrequest); }
int main (int argc, char* argv[]){ int rank, size; int my_prev, my_next; int sum; /* Message information */ int sendbuf, recvbuf; int my_tag = 9999; MPI_Request reqr; MPI_Request reqs; MPI_Status status[1]; /* loop counter */ int n; MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); /* Work out neighbouring ranks */ my_prev = rank - 1; if (my_prev < 0) my_prev = my_prev + size; my_next = (rank + 1) % size; /* Initialise the sum and the message for all ranks */ sum = 0; sendbuf = rank; for (n = 0; n < size; n++) { MPI_Irecv(&recvbuf, 1, MPI_INT, my_prev, my_tag, MPI_COMM_WORLD, &reqr); MPI_Issend(&sendbuf, 1, MPI_INT, my_next, my_tag, MPI_COMM_WORLD, &reqs); /* When the receive has completed, we can use the contents of recvbuf */ MPI_Wait(&reqr, status); sum = sum + recvbuf; /* When the send has completed, we can safely re-use the send buffer */ MPI_Wait(&reqs, status); sendbuf = recvbuf; } /* Display the result on all ranks, along with the correct answer */ printf("Rank %2d has sum of ranks %d; Answer = %d\n",rank, sum, (size-1)*size/2); MPI_Finalize(); return 0; }
static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) { PetscErrorCode ierr; PetscMPIInt nrecvs,tag,unitbytes,done; PetscInt i; char *tdata; MPI_Request *sendreqs,barrier; PetscSegBuffer segrank,segdata; PetscFunctionBegin; ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr); tdata = (char*)todata; ierr = PetscMalloc(nto*sizeof(MPI_Request),&sendreqs);CHKERRQ(ierr); for (i=0; i<nto; i++) { ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); } ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); nrecvs = 0; barrier = MPI_REQUEST_NULL; for (done=0; !done; ) { PetscMPIInt flag; MPI_Status status; ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); if (flag) { /* incoming message */ PetscMPIInt *recvrank; void *buf; ierr = PetscSegBufferGet(&segrank,1,&recvrank);CHKERRQ(ierr); ierr = PetscSegBufferGet(&segdata,count,&buf);CHKERRQ(ierr); *recvrank = status.MPI_SOURCE; ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); nrecvs++; } if (barrier == MPI_REQUEST_NULL) { PetscMPIInt sent,nsends; ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); if (sent) { ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); ierr = PetscFree(sendreqs);CHKERRQ(ierr); } } else { ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); } } *nfrom = nrecvs; ierr = PetscSegBufferExtractAlloc(&segrank,fromranks);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); ierr = PetscSegBufferExtractAlloc(&segdata,fromdata);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); PetscFunctionReturn(0); }
/** * vsg_packed_msg_issend: * @pm: a #VsgPackedMsg. * @dst: the destination task id. * @tag: an integer message tag. * @request: the corresponding request object * * Sends stored message to the specified destination with the specified tag in * a non blocking mode. @request is provided for output. */ void vsg_packed_msg_issend (VsgPackedMsg *pm, gint dst, gint tag, MPI_Request *request) { gint ierr; _trace_write_msg_send (pm, "issend", dst, tag); ierr = MPI_Issend (pm->buffer, pm->position, MPI_PACKED, dst, tag, pm->communicator, request); if (ierr != MPI_SUCCESS) vsg_mpi_error_output (ierr); }
request operator()(MPI_Comm comm, int dest, int tag, const T& x) const { #ifndef DIY_NO_MPI request r; typedef mpi_datatype<T> Datatype; MPI_Issend((void*) Datatype::address(x), Datatype::count(x), Datatype::datatype(), dest, tag, comm, &r.r); return r; #else (void) comm; (void) dest; (void) tag; (void) x; DIY_UNSUPPORTED_MPI_CALL(MPI_Issend); #endif }
void mpi_issend_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) { MPI_Datatype c_type = MPI_Type_f2c(*datatype); MPI_Request c_req; MPI_Comm c_comm; c_comm = MPI_Comm_f2c (*comm); *ierr = OMPI_INT_2_FINT(MPI_Issend(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), c_type, OMPI_FINT_2_INT(*dest), OMPI_FINT_2_INT(*tag), c_comm, &c_req)); if (MPI_SUCCESS == OMPI_FINT_2_INT(*ierr)) { *request = MPI_Request_c2f(c_req); } }
MPI_Request send_str(std::string message, int to_whom) { // If there is an outstanding message to the // current thread, retrieve it first int my_rank; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); while (is_message()) { msg_backlog.push_back(get_str(MPI_ANY_SOURCE, true)); } MPI_Request request; MPI_Issend((char*)message.c_str(), message.size(), MPI_CHAR, to_whom, 0, MPI_COMM_WORLD, &request); return request; }
void dosend(int dest, rndtype val, rndtype finish, MPI_Request* recvReq, rndtype recvBuf[]) { MPI_Request sendReq; int sendFlag, recvFlag; MPI_Status sendStat, recvStat; rndtype buf[2]; buf[0] = val; buf[1] = finish; MPI_Issend(buf, 2*sizeof(rndtype), MPI_BYTE, dest, 1, MPI_COMM_WORLD, &sendReq); do { MPI_Test(&sendReq, &sendFlag, &sendStat); if (!doneWithRecvs) { tryRecv(recvReq, recvBuf); } } while (!sendFlag); }
void Coupler::startCollect(const double* in) { data = const_cast<double*>(in); #ifdef ESYS_MPI if (mpi_info->size > 1) { if (in_use) { throw PasoException("Coupler::startCollect: Coupler in use."); } // start receiving input for (dim_t i=0; i < connector->recv->neighbour.size(); ++i) { MPI_Irecv(&recv_buffer[connector->recv->offsetInShared[i]*block_size], (connector->recv->offsetInShared[i+1]-connector->recv->offsetInShared[i])*block_size, MPI_DOUBLE, connector->recv->neighbour[i], mpi_info->counter()+connector->recv->neighbour[i], mpi_info->comm, &mpi_requests[i]); } // collect values into buffer const int numSharedSend = connector->send->numSharedComponents; if (block_size > 1) { const size_t block_size_size=block_size*sizeof(double); #pragma omp parallel for for (dim_t i=0; i < numSharedSend; ++i) { memcpy(&(send_buffer[(block_size)*i]), &(in[block_size*connector->send->shared[i]]), block_size_size); } } else { #pragma omp parallel for for (dim_t i=0; i < numSharedSend; ++i) { send_buffer[i]=in[connector->send->shared[i]]; } } // send buffer out for (dim_t i=0; i < connector->send->neighbour.size(); ++i) { MPI_Issend(&send_buffer[connector->send->offsetInShared[i]*block_size], (connector->send->offsetInShared[i+1] - connector->send->offsetInShared[i])*block_size, MPI_DOUBLE, connector->send->neighbour[i], mpi_info->counter()+mpi_info->rank, mpi_info->comm, &mpi_requests[i+connector->recv->neighbour.size()]); } mpi_info->incCounter(mpi_info->size); in_use = true; } #endif }
void _ParticleShadowSync_SendShadowParticles( ParticleCommHandler *self ) { ShadowInfo* cellShadowInfo = CellLayout_GetShadowInfo( self->swarm->cellLayout ); ProcNbrInfo* procNbrInfo = cellShadowInfo->procNbrInfo; Processor_Index proc_I; int i = 0, j = 0, k = 0, cell = 0; unsigned int arrayIndex = 0; long arraySize = 0; unsigned int pIndex = 0; self->shadowParticlesLeavingMeHandles = Memory_Alloc_Array_Unnamed( MPI_Request*, procNbrInfo->procNbrCnt ); self->shadowParticlesLeavingMe = Memory_Alloc_Array_Unnamed( Particle*, procNbrInfo->procNbrCnt ); for( i=0; i<procNbrInfo->procNbrCnt; i++ ){ proc_I = procNbrInfo->procNbrTbl[i]; if( self->shadowParticlesLeavingMeTotalCounts[i] != 0 ){ self->shadowParticlesLeavingMeHandles[i] = Memory_Alloc_Array_Unnamed( MPI_Request, 1 ); arraySize = self->swarm->particleExtensionMgr->finalSize * self->shadowParticlesLeavingMeTotalCounts[i]; self->shadowParticlesLeavingMe[i] = Memory_Alloc_Bytes( arraySize, "Particle", "pCommHandler->outgoingPArray" ); memset( self->shadowParticlesLeavingMe[i], 0, arraySize ); arrayIndex = 0; for( j=0; j<cellShadowInfo->procShadowedCnt[i]; j++ ){ cell = cellShadowInfo->procShadowedTbl[i][j]; for( k=0; k<self->swarm->cellParticleCountTbl[cell]; k++ ){ pIndex = self->swarm->cellParticleTbl[cell][k]; Swarm_CopyParticleOffSwarm( self->swarm, self->shadowParticlesLeavingMe[i], arrayIndex++, pIndex ); } } /*printf( "sending %ld bytes\n", self->shadowParticlesLeavingMeTotalCounts[i] * self->swarm->particleExtensionMgr->finalSize );*/ MPI_Issend( self->shadowParticlesLeavingMe[i], self->shadowParticlesLeavingMeTotalCounts[i] * self->swarm->particleExtensionMgr->finalSize, MPI_BYTE, proc_I, SHADOW_PARTICLES, self->swarm->comm, self->shadowParticlesLeavingMeHandles[i] ); } } }
int tormpi_Issend(void* buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, tormpi_Request *request){ MPI_Aint position=0,size; int sendres,esize; void *pbuf, *atn_buf; unsigned int linkId,chId,offset=0,sz,err; /* Use the lut only for non optimised nearest-neighbors-only communications */ if( tormpi_neigh_comm_only==0) { dest=tormpi_lut[dest]; } /* rank of dest is non-neg when dest is not reachabe via atn */ if (dest >= 0){ (*request).flag=0; sendres=MPI_Issend(buf, count, datatype, dest, tag, comm, &((*request).mpir)); return(sendres); }else { (*request).flag=2; chId=tormpi_vc[-dest]; linkId=-dest-1; MPI_Pack_external_size("external32",count,datatype,&size); sz=(unsigned int)ceil(size/TORMPI_MINPACKF); esize=sz*TORMPI_MINPACK; if(posix_memalign(&atn_buf,(size_t)TORMPI_BUFALIGN,(size_t)esize) !=0) return(-700); MPI_Pack_external("external32",buf,count,datatype,atn_buf,size,&position); pbuf=atn_buf; if(sz <= tormpi_maxbuff){ #ifdef WITHPROXYBIP atnCredit(linkId,chId,0,1,0); atnPoll(linkId,chId,0,1,&tormpi_bip,0); #endif ATNSEND(linkId,chId,pbuf,0,sz); } else { fprintf(stderr,"tormpi_isend ERROR: packet too large (%d>%d). Please use _isendrecv.\n",sz,tormpi_maxbuff); exit(-751); } free(atn_buf); return(MPI_SUCCESS); } }
void Computer::fold(std::vector<double> *folded, int target_rank) { int my_rank, process_count; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &process_count); MPI_Request send_request; MPI_Status tmp_status; MPI_Barrier(MPI_COMM_WORLD); MPI_Issend(folded->data(), folded->size(), MPI_DOUBLE, target_rank, 0, MPI_COMM_WORLD, &send_request); if (my_rank != 0) { MPI_Wait(&send_request, &tmp_status); return; } auto received = new std::vector<double>; auto recv_array = new double[runs]; received->reserve(runs * process_count); for (int i = 0; i < process_count; ++i) { int received_count; MPI_Status recv_status; MPI_Recv(recv_array, runs, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &recv_status); MPI_Get_count(&recv_status, MPI_DOUBLE, &received_count); for (int j = 0; j < received_count; ++j) received->push_back(recv_array[j]); } MPI_Wait(&send_request, &tmp_status); folded->clear(); folded->reserve(received->size()); for (int i = 0; (unsigned)i < received->size(); ++i) folded->push_back(received->at(i)); delete[] recv_array; delete received; }
int MPIR_Issend_cdesc(CFI_cdesc_t* x0, int x1, MPI_Datatype x2, int x3, int x4, MPI_Comm x5, MPI_Request * x6) { int err = MPI_SUCCESS; void *buf0 = x0->base_addr; int count0 = x1; MPI_Datatype dtype0 = x2; if (buf0 == &MPIR_F08_MPI_BOTTOM) { buf0 = MPI_BOTTOM; } if (x0->rank != 0 && !CFI_is_contiguous(x0)) { err = cdesc_create_datatype(x0, x1, x2, &dtype0); count0 = 1; } err = MPI_Issend(buf0, count0, dtype0, x3, x4, x5, x6); if (dtype0 != x2) MPI_Type_free(&dtype0); return err; }
int main (int argc, char *argv[]) { int rank, size; MPI_Request req; MPI_Status status; #ifdef initial_code initial_code #endif MPII_Stack_size = 10000; MPI_Init (&argc, &argv); MPI_Comm_rank (MPI_COMM_WORLD, &rank); MPI_Comm_size (MPI_COMM_WORLD, &size); printf ("Hello world from process %d/%d\n", rank+1, size); if (rank == 0) { int i; MPI_Request *req = (MPI_Request *) malloc (sizeof (MPI_Request) * (size-1)); MPI_Status *statuses = (MPI_Status *) malloc (sizeof (MPI_Status) * (size-1)); for (i = 1; i < size; i++) MPI_Issend (&rank, 1, MPI_INT, i, 0, MPI_COMM_WORLD, &(req[i-1])); MPI_Waitall (size-1, req, statuses); } else { MPI_Recv (&rank, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); printf ("Process %d (source %d, tag %d) also says hello\n", rank+1, status.MPI_SOURCE, status.MPI_TAG); } MPI_Finalize (); return 0; }
int main( int argc, char **argv ) { int size, rank, flag, i; int *buf1, *buf2, cnt; double t0; MPI_Status statuses[2]; MPI_Request req[2]; MPI_Init( &argc, &argv ); MPI_Comm_size( MPI_COMM_WORLD, &size ); MPI_Comm_rank( MPI_COMM_WORLD, &rank ); if (size < 2) { printf( "This test requires at least 2 processors\n" ); MPI_Abort( MPI_COMM_WORLD, 1 ); return 1; } /* Large enough that almost certainly a rendezvous algorithm will be used by Issend. buflimit.c will give you a more reliable value */ cnt = 35000; /* Test: process 0 process 1 Irecv1 Irecv2 Sendrecv Sendrecv pause(2 sec) pause(2 sec) Issend2 Waitall test(2) for 5 secs Ssend1 Wait(2) if necessary If the test for Issend2 never succeeds, then the waitall appears to be waiting for req1 first. By using Issend, we can keep the program from hanging. */ buf1 = (int *)malloc( cnt * sizeof(int) ); buf2 = (int *)malloc( cnt * sizeof(int) ); if (!buf1 || !buf2) { printf( "Could not allocate buffers of size %d\n", cnt ); MPI_Abort( MPI_COMM_WORLD, 1 ); return 1; } for (i=0; i<cnt; i++) { buf1[i] = i; buf2[i] = i; } if (rank == 0) { MPI_Sendrecv( MPI_BOTTOM, 0, MPI_BYTE, size - 1, 3, MPI_BOTTOM, 0, MPI_BYTE, size - 1, 3, MPI_COMM_WORLD, &statuses[0] ); Pause( 2.0 ); MPI_Issend( buf2, cnt, MPI_INT, size-1, 2, MPI_COMM_WORLD, &req[0] ); t0 = MPI_Wtime(); flag = 0; while (t0 + 5.0 > MPI_Wtime() && !flag) MPI_Test( &req[0], &flag, &statuses[0] ); MPI_Ssend( buf1, cnt, MPI_INT, size-1, 1, MPI_COMM_WORLD ); if (!flag) { printf( "*ERROR: MPI_Waitall appears to be waiting for requests in the order\n\ they appear in the request list\n" ); MPI_Wait( &req[0], &statuses[0] ); }
int main(int argc, char *argv[]) { int provided, wrank, wsize, nmsg, i, tag; int *(buf[MAX_TARGETS]), bufsize[MAX_TARGETS]; MPI_Request r[MAX_TARGETS]; MPI_Comm commDup, commEven; MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided); MPI_Comm_rank(MPI_COMM_WORLD, &wrank); MPI_Comm_size(MPI_COMM_WORLD, &wsize); if (wsize < 4) { fprintf(stderr, "This test requires at least 4 processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } /* Create several communicators */ MPI_Comm_dup(MPI_COMM_WORLD, &commDup); MPI_Comm_set_name(commDup, "User dup of comm world"); MPI_Comm_split(MPI_COMM_WORLD, wrank & 0x1, wrank, &commEven); if (wrank & 0x1) MPI_Comm_free(&commEven); else MPI_Comm_set_name(commEven, "User split to even ranks"); /* Create a collection of pending sends and receives * We use tags on the sends and receives (when ANY_TAG isn't used) * to provide an easy way to check that the proper requests are present. * TAG values use fields, in decimal (for easy reading): * 0-99: send/recv type: * 0 - other * 1 - irecv * 2 - isend * 3 - issend * 4 - ibsend * 5 - irsend * 6 - persistent recv * 7 - persistent send * 8 - persistent ssend * 9 - persistent rsend * 10 - persistent bsend * 100-999: destination (for send) or source, if receive. 999 = any-source * (rank is value/100) * 1000-2G: other values */ /* Create the send/receive buffers */ nmsg = 10; for (i = 0; i < nmsg; i++) { bufsize[i] = i; if (i) { buf[i] = (int *) calloc(bufsize[i], sizeof(int)); if (!buf[i]) { fprintf(stderr, "Unable to allocate %d words\n", bufsize[i]); MPI_Abort(MPI_COMM_WORLD, 2); } } else buf[i] = 0; } /* Partial implementation */ if (wrank == 0) { nmsg = 0; tag = 2 + 1 * 100; MPI_Isend(buf[0], bufsize[0], MPI_INT, 1, tag, MPI_COMM_WORLD, &r[nmsg++]); tag = 3 + 2 * 100; MPI_Issend(buf[1], bufsize[1], MPI_INT, 2, tag, MPI_COMM_WORLD, &r[nmsg++]); tag = 1 + 3 * 100; MPI_Irecv(buf[2], bufsize[2], MPI_INT, 3, tag, MPI_COMM_WORLD, &r[nmsg++]); } else if (wrank == 1) { } else if (wrank == 2) { } else if (wrank == 3) { } /* provide a convenient place to wait */ MPI_Barrier(MPI_COMM_WORLD); printf("Barrier 1 finished\n"); /* Match up (or cancel) the requests */ if (wrank == 0) { MPI_Waitall(nmsg, r, MPI_STATUSES_IGNORE); } else if (wrank == 1) { tag = 2 + 1 * 100; MPI_Recv(buf[0], bufsize[0], MPI_INT, 0, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else if (wrank == 2) { tag = 3 + 2 * 100; MPI_Recv(buf[1], bufsize[1], MPI_INT, 0, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else if (wrank == 3) { tag = 1 + 3 * 100; MPI_Send(buf[2], bufsize[2], MPI_INT, 0, tag, MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); printf("Barrier 2 finished\n"); MPI_Comm_free(&commDup); if (commEven != MPI_COMM_NULL) MPI_Comm_free(&commEven); MPI_Finalize(); return 0; }
int main() { MPI_Init(NULL, NULL); /* MPI variables (in some sense) */ MPI_Comm comm; MPI_Status status; MPI_Request request; int size, rank, tag; int comm2d, disp, left, right, up, down, reorder; int dims[NDIMS], period[NDIMS], direction[NDIMS]; /* variable for the program */ int nx, ny, nxp, nyp, nxpe, nype; int i, j, iter; int lastcheck, checkinc; double max, delta; double avg, mean; char picName[20] = "edgeCHANGETHIS.pgm"; /* * find the size of the image do the arrays can be defined */ pgmsize(picName, &nx, &ny); comm = MPI_COMM_WORLD; MPI_Comm_size(comm, &size); tag = 1; /* Introduce Cartesian topology */ for(i=0; i<NDIMS; ++i) { dims[i] = 0; period[i] = FALSE; /* TRUE gives Cyclic */ direction[i] = i; /* shift along the same index as element of the array*/ } reorder = TRUE; /* allows the processes to become reordered to hopefully improve efficiency */ disp = 1; /* Shift by 1 */ MPI_Dims_create(size,NDIMS,dims); MPI_Cart_create(comm,NDIMS,dims,period,reorder,&comm2d); MPI_Comm_rank(comm2d,&rank); MPI_Cart_shift(comm2d,direction[1],disp,&left,&right); MPI_Cart_shift(comm2d,direction[0],disp,&up,&down); /* check the array is a reasonable size to be split up among the processors to be used and if not quit */ if(nx < dims[1] || ny < dims[0]) { if(ROOT == rank) { printf("too many processors running on job, %d in x direction but only %d elements, %d in y, %d elements\n", dims[1], nx, dims[0], ny); } return 1; } initialise_local_array_sizes(nx, ny, &nxp, &nyp, &nxpe, &nype, dims, rank, size); /* now declare the arrays necessary (note they can be different sizes on different processes*/ float localBuf[nxp][nyp]; float localEdge[nxp+2][nyp+2], localOld[nxp+2][nyp+2], localNew[nxp+2][nyp+2]; float globalImage[nx][ny]; /* * set the halos of all the appropriate arrays to 255 */ set_halos(localEdge,localOld, localNew, nxp, nyp); if(ROOT == rank) { printf("Reading in Picture\n"); pgmread(picName, globalImage, nx, ny); } /*set up all the datatypes that will need to be used*/ /*send contiguous halos*/ MPI_Datatype mcols; MPI_Type_contiguous(nyp, MPI_FLOAT, &mcols); MPI_Type_commit(&mcols); /*send non-conmtiguous halos*/ MPI_Datatype mrows; MPI_Type_vector(nxp, 1, nyp+2, MPI_FLOAT, &mrows); /*nyp+2 since will be used on nyp+2 size arrays*/ MPI_Type_commit(&mrows); /*scatter data to processes with same size arrays as ROOT*/ MPI_Datatype scatter[4]; MPI_Type_vector(nxp, nyp, ny, MPI_FLOAT, &scatter[3]); MPI_Type_commit(&scatter[3]); /*scatter data to processes with different size arrays than ROOT in dim[0]*/ MPI_Type_vector(nxp, nype, ny, MPI_FLOAT, &scatter[0]); MPI_Type_commit(&scatter[0]); /*scatter data to processes with different size arrays than ROOT in dim[1]*/ MPI_Type_vector(nxpe, nyp, ny, MPI_FLOAT, &scatter[1]); MPI_Type_commit(&scatter[1]); /*scatter data to processes with different size arrays than ROOT in dim[0] and dim[1]*/ MPI_Type_vector(nxpe, nype, ny, MPI_FLOAT, &scatter[4]); MPI_Type_commit(&scatter[4]); /* Scatter the data from processer 0 to the rest */ if(ROOT == rank) { printf("Scattering image\n"); scatter_data(globalImage, localBuf, ny, nxp, nyp, dims, rank, comm2d, scatter); } else { MPI_Recv(localBuf, nxp*nyp, MPI_FLOAT, 0, rank, comm2d, &status); } /* * set up the edge data to be used in computation */ for(i=0; i<nxp; ++i) { for(j=0; j<nyp; ++j) { localEdge[i+1][j+1] = localBuf[i][j]; localOld[i+1][j+1] = 255; } } /* * computation loop */ if(ROOT == rank) { printf("Performing update routine for %d iterations\n", ITERATIONS); } double t1, t2; t1 = MPI_Wtime(); tag = 2; lastcheck = checkinc = iter = 0; delta = 1; while(iter < ITERATIONS) { send_halos(localOld, left, right, up, down, comm2d, tag, nxp, nyp, mrows, mcols); avg = 0; for(i=1; i<nxp+1; ++i) { for(j=1; j<nyp+1; ++j) { localNew[i][j] = 0.25*(localOld[i-1][j] + localOld[i+1][j] + localOld[i][j-1] + localOld[i][j+1] - localEdge[i][j]); avg = avg + localNew[i][j]; } } max = 0; for(i=1; i<nxp+1; ++i) { for(j=1; j<nyp+1; ++j) { if(fabs(localNew[i][j] - localOld[i][j]) > max) { max = fabs(localNew[i][j] - localOld[i][j]); } localOld[i][j] = localNew[i][j]; } } /* * want to perform a calculation of the average pixel value and delta */ if(iter == lastcheck + checkinc) { lastcheck = iter; MPI_Reduce(&avg, &mean, 1, MPI_DOUBLE, MPI_SUM, ROOT, comm2d); MPI_Allreduce(&max, &delta, 1, MPI_DOUBLE, MPI_MAX, comm2d); if(ROOT == rank) { // printf("iteration %d, average pixel value is %f, current delta %f\n", iter, mean/(nx*ny), delta); } checkinc = (int)(delta*500); if(checkinc > 200) checkinc = 500; } ++iter; if(ITERATIONS == iter) { break; } } t2 = MPI_Wtime(); if(ROOT == rank) { printf("finished after %d iterations, delta was %f\n", iter-1, delta); printf("seconds per iteration: %f\n", (t2-t1)/(iter-1)); } for(i=0; i<nxp; ++i) { for(j=0; j<nyp; ++j) { localBuf[i][j] = localOld[i+1][j+1]; } } tag = 3; if(ROOT == rank) { printf("recieving back data\n"); receive_data(globalImage, localBuf, ny, nxp, nyp, dims, tag, rank, comm2d, scatter); } else { MPI_Issend(localBuf, nxp*nyp, MPI_FLOAT, ROOT, tag, comm2d, &request); MPI_Wait(&request, &status); } if(ROOT == rank) { pgmwrite("parpictureCHANGETHIS.pgm", globalImage, nx, ny); } MPI_Finalize(); return 0; }
int main( int argc, char *argv[] ) { int errs = 0; int rank, size, /* source, */ dest; MPI_Comm comm; MPI_Status status; MPI_Request req; static int bufsizes[4] = { 1, 100, 10000, 1000000 }; char *buf; #ifdef TEST_IRSEND int veryPicky = 0; /* Set to 1 to test "quality of implementation" in a tricky part of cancel */ #endif int cs, flag, n; MTest_Init( &argc, &argv ); comm = MPI_COMM_WORLD; MPI_Comm_rank( comm, &rank ); MPI_Comm_size( comm, &size ); /* source = 0; */ dest = size - 1; MTestPrintfMsg( 1, "Starting scancel test\n" ); for (cs=0; cs<4; cs++) { if (rank == 0) { n = bufsizes[cs]; buf = (char *)malloc( n ); if (!buf) { fprintf( stderr, "Unable to allocate %d bytes\n", n ); MPI_Abort( MPI_COMM_WORLD, 1 ); exit(1); } MTestPrintfMsg( 1, "(%d) About to create isend and cancel\n",cs ); MPI_Isend( buf, n, MPI_CHAR, dest, cs+n+1, comm, &req ); MPI_Cancel( &req ); MPI_Wait( &req, &status ); MTestPrintfMsg( 1, "Completed wait on isend\n" ); MPI_Test_cancelled( &status, &flag ); if (!flag) { errs ++; printf( "Failed to cancel an Isend request\n" ); fflush(stdout); } else { n = 0; } /* Send the size, zero for successfully cancelled */ MPI_Send( &n, 1, MPI_INT, dest, 123, comm ); /* Send the tag so the message can be received */ n = cs+n+1; MPI_Send( &n, 1, MPI_INT, dest, 123, comm ); free( buf ); } else if (rank == dest) { int nn, tag; char *btemp; MPI_Recv( &nn, 1, MPI_INT, 0, 123, comm, &status ); MPI_Recv( &tag, 1, MPI_INT, 0, 123, comm, &status ); if (nn > 0) { /* If the message was not cancelled, receive it here */ btemp = (char*)malloc( nn ); if (!btemp) { fprintf( stderr, "Unable to allocate %d bytes\n", nn ); MPI_Abort( MPI_COMM_WORLD, 1 ); exit(1); } MPI_Recv( btemp, nn, MPI_CHAR, 0, tag, comm, &status ); free(btemp); } } MPI_Barrier( comm ); if (rank == 0) { char *bsendbuf; int bsendbufsize; int bf, bs; n = bufsizes[cs]; buf = (char *)malloc( n ); if (!buf) { fprintf( stderr, "Unable to allocate %d bytes\n", n ); MPI_Abort( MPI_COMM_WORLD, 1 ); exit(1); } bsendbufsize = n + MPI_BSEND_OVERHEAD; bsendbuf = (char *)malloc( bsendbufsize ); if (!bsendbuf) { fprintf( stderr, "Unable to allocate %d bytes for bsend\n", n ); MPI_Abort( MPI_COMM_WORLD, 1 ); exit(1); } MPI_Buffer_attach( bsendbuf, bsendbufsize ); MTestPrintfMsg( 1, "About to create and cancel ibsend\n" ); MPI_Ibsend( buf, n, MPI_CHAR, dest, cs+n+2, comm, &req ); MPI_Cancel( &req ); MPI_Wait( &req, &status ); MPI_Test_cancelled( &status, &flag ); if (!flag) { errs ++; printf( "Failed to cancel an Ibsend request\n" ); fflush(stdout); } else { n = 0; } /* Send the size, zero for successfully cancelled */ MPI_Send( &n, 1, MPI_INT, dest, 123, comm ); /* Send the tag so the message can be received */ n = cs+n+2; MPI_Send( &n, 1, MPI_INT, dest, 123, comm ); free( buf ); MPI_Buffer_detach( &bf, &bs ); free( bsendbuf ); } else if (rank == dest) { int nn, tag; char *btemp; MPI_Recv( &nn, 1, MPI_INT, 0, 123, comm, &status ); MPI_Recv( &tag, 1, MPI_INT, 0, 123, comm, &status ); if (nn > 0) { /* If the message was not cancelled, receive it here */ btemp = (char*)malloc( nn ); if (!btemp) { fprintf( stderr, "Unable to allocate %d bytes\n", nn); MPI_Abort( MPI_COMM_WORLD, 1 ); exit(1); } MPI_Recv( btemp, nn, MPI_CHAR, 0, tag, comm, &status ); free(btemp); } } MPI_Barrier( comm ); /* Because this test is erroneous, we do not perform it unless TEST_IRSEND is defined. */ #ifdef TEST_IRSEND /* We avoid ready send to self because an implementation is free to detect the error in delivering a message to itself without a pending receive; we could also check for an error return from the MPI_Irsend */ if (rank == 0 && dest != rank) { n = bufsizes[cs]; buf = (char *)malloc( n ); if (!buf) { fprintf( stderr, "Unable to allocate %d bytes\n", n ); MPI_Abort( MPI_COMM_WORLD, 1 ); exit(1); } MTestPrintfMsg( 1, "About to create and cancel irsend\n" ); MPI_Irsend( buf, n, MPI_CHAR, dest, cs+n+3, comm, &req ); MPI_Cancel( &req ); MPI_Wait( &req, &status ); MPI_Test_cancelled( &status, &flag ); /* This can be pretty ugly. The standard is clear (Section 3.8) that either a sent message is received or the sent message is successfully cancelled. Since this message can never be received, the cancel must complete successfully. However, since there is no matching receive, this program is erroneous. In this case, we can't really flag this as an error */ if (!flag && veryPicky) { errs ++; printf( "Failed to cancel an Irsend request\n" ); fflush(stdout); } if (flag) { n = 0; } /* Send the size, zero for successfully cancelled */ MPI_Send( &n, 1, MPI_INT, dest, 123, comm ); /* Send the tag so the message can be received */ n = cs+n+3; MPI_Send( &n, 1, MPI_INT, dest, 123, comm ); free( buf ); } else if (rank == dest) { int n, tag; char *btemp; MPI_Recv( &n, 1, MPI_INT, 0, 123, comm, &status ); MPI_Recv( &tag, 1, MPI_INT, 0, 123, comm, &status ); if (n > 0) { /* If the message was not cancelled, receive it here */ btemp = (char*)malloc( n ); if (!btemp) { fprintf( stderr, "Unable to allocate %d bytes\n", n); MPI_Abort( MPI_COMM_WORLD, 1 ); exit(1); } MPI_Recv( btemp, n, MPI_CHAR, 0, tag, comm, &status ); free(btemp); } } MPI_Barrier( comm ); #endif if (rank == 0) { n = bufsizes[cs]; buf = (char *)malloc( n ); if (!buf) { fprintf( stderr, "Unable to allocate %d bytes\n", n ); MPI_Abort( MPI_COMM_WORLD, 1 ); exit(1); } MTestPrintfMsg( 1, "About to create and cancel issend\n" ); MPI_Issend( buf, n, MPI_CHAR, dest, cs+n+4, comm, &req ); MPI_Cancel( &req ); MPI_Wait( &req, &status ); MPI_Test_cancelled( &status, &flag ); if (!flag) { errs ++; printf( "Failed to cancel an Issend request\n" ); fflush(stdout); } else { n = 0; } /* Send the size, zero for successfully cancelled */ MPI_Send( &n, 1, MPI_INT, dest, 123, comm ); /* Send the tag so the message can be received */ n = cs+n+4; MPI_Send( &n, 1, MPI_INT, dest, 123, comm ); free( buf ); } else if (rank == dest) { int nn, tag; char *btemp; MPI_Recv( &nn, 1, MPI_INT, 0, 123, comm, &status ); MPI_Recv( &tag, 1, MPI_INT, 0, 123, comm, &status ); if (nn > 0) { /* If the message was not cancelled, receive it here */ btemp = (char*)malloc( nn ); if (!btemp) { fprintf( stderr, "Unable to allocate %d bytes\n", nn); MPI_Abort( MPI_COMM_WORLD, 1 ); exit(1); } MPI_Recv( btemp, nn, MPI_CHAR, 0, tag, comm, &status ); free(btemp); } } MPI_Barrier( comm ); } MTest_Finalize( errs ); MPI_Finalize(); return 0; }
void SystemMatrix::copyColCoupleBlock() { if (mpi_info->size == 1) { // nothing to do return; } else if (!row_coupleBlock) { throw PasoException("SystemMatrix::copyColCoupleBlock: " "creation of row_coupleBlock pattern not supported yet."); } else if (row_coupler->in_use) { throw PasoException("SystemMatrix::copyColCoupleBlock: Coupler in use."); } const dim_t numNeighboursSend = row_coupler->connector->send->neighbour.size(); const dim_t numNeighboursRecv = row_coupler->connector->recv->neighbour.size(); // start receiving for (dim_t p = 0; p < numNeighboursRecv; p++) { #ifdef ESYS_MPI const index_t irow1 = row_coupler->connector->recv->offsetInShared[p]; const index_t irow2 = row_coupler->connector->recv->offsetInShared[p+1]; const index_t a = row_coupleBlock->pattern->ptr[irow1]; const index_t b = row_coupleBlock->pattern->ptr[irow2]; MPI_Irecv(&row_coupleBlock->val[a*block_size], (b-a) * block_size, MPI_DOUBLE, row_coupler->connector->recv->neighbour[p], mpi_info->counter()+row_coupler->connector->recv->neighbour[p], mpi_info->comm, &row_coupler->mpi_requests[p]); #endif } // start sending index_t z0 = 0; double* send_buffer = new double[col_coupleBlock->len]; const size_t block_size_size = block_size*sizeof(double); for (dim_t p = 0; p < numNeighboursSend; p++) { // j_min, j_max defines the range of columns to be sent to processor p const index_t j_min = col_coupler->connector->recv->offsetInShared[p]; const index_t j_max = col_coupler->connector->recv->offsetInShared[p+1]; index_t z = z0; // run over the rows to be connected to processor p for (index_t rPtr=row_coupler->connector->send->offsetInShared[p]; rPtr < row_coupler->connector->send->offsetInShared[p+1]; ++rPtr) { const index_t row = row_coupler->connector->send->shared[rPtr]; // collect the entries in the col couple block referring to // columns on processor p for (index_t iPtr=col_coupleBlock->pattern->ptr[row]; iPtr < col_coupleBlock->pattern->ptr[row+1]; ++iPtr) { const index_t j = col_coupleBlock->pattern->index[iPtr]; if (j_min <= j && j < j_max) { memcpy(&send_buffer[z], &col_coupleBlock->val[block_size*iPtr], block_size_size); z+=block_size; } } } #ifdef ESYS_MPI MPI_Issend(&send_buffer[z0], z-z0, MPI_DOUBLE, row_coupler->connector->send->neighbour[p], mpi_info->counter()+mpi_info->rank, mpi_info->comm, &row_coupler->mpi_requests[p+numNeighboursRecv]); #endif z0 = z; } // wait until everything is done #ifdef ESYS_MPI mpi_info->incCounter(mpi_info->size); MPI_Waitall(numNeighboursSend+numNeighboursRecv, row_coupler->mpi_requests, row_coupler->mpi_stati); #endif delete[] send_buffer; }
SparseMatrix_ptr SystemMatrix::mergeSystemMatrix() const { const index_t n = mainBlock->numRows; if (mpi_info->size == 1) { index_t* ptr = new index_t[n]; #pragma omp parallel for for (index_t i=0; i<n; i++) ptr[i] = i; SparseMatrix_ptr out(mainBlock->getSubmatrix(n, n, ptr, ptr)); delete[] ptr; return out; } #ifdef ESYS_MPI const index_t size=mpi_info->size; const index_t rank=mpi_info->rank; // Merge main block and couple block to get the complete column entries // for each row allocated to current rank. Output (ptr, idx, val) // contains all info needed from current rank to merge a system matrix index_t *ptr, *idx; double *val; mergeMainAndCouple(&ptr, &idx, &val); std::vector<MPI_Request> mpi_requests(size*2); std::vector<MPI_Status> mpi_stati(size*2); // Now, pass all info to rank 0 and merge them into one sparse matrix if (rank == 0) { // First, copy local ptr values into ptr_global const index_t global_n = getGlobalNumRows(); index_t* ptr_global = new index_t[global_n+1]; memcpy(ptr_global, ptr, (n+1)*sizeof(index_t)); delete[] ptr; index_t iptr = n+1; index_t* temp_n = new index_t[size]; index_t* temp_len = new index_t[size]; temp_n[0] = iptr; // Second, receive ptr values from other ranks for (index_t i=1; i<size; i++) { const index_t remote_n = row_distribution->first_component[i+1] - row_distribution->first_component[i]; MPI_Irecv(&ptr_global[iptr], remote_n, MPI_INT, i, mpi_info->counter()+i, mpi_info->comm, &mpi_requests[i]); temp_n[i] = remote_n; iptr += remote_n; } mpi_info->incCounter(size); MPI_Waitall(size-1, &mpi_requests[1], &mpi_stati[0]); // Then, prepare to receive idx and val from other ranks index_t len = 0; index_t offset = -1; for (index_t i=0; i<size; i++) { if (temp_n[i] > 0) { offset += temp_n[i]; len += ptr_global[offset]; temp_len[i] = ptr_global[offset]; } else temp_len[i] = 0; } index_t* idx_global = new index_t[len]; iptr = temp_len[0]; offset = n+1; for (index_t i=1; i<size; i++) { len = temp_len[i]; MPI_Irecv(&idx_global[iptr], len, MPI_INT, i, mpi_info->counter()+i, mpi_info->comm, &mpi_requests[i]); const index_t remote_n = temp_n[i]; for (index_t j=0; j<remote_n; j++) { ptr_global[j+offset] = ptr_global[j+offset] + iptr; } offset += remote_n; iptr += len; } memcpy(idx_global, idx, temp_len[0]*sizeof(index_t)); delete[] idx; MPI_Waitall(size-1, &mpi_requests[1], &mpi_stati[0]); mpi_info->incCounter(size); delete[] temp_n; // Then generate the sparse matrix const index_t rowBlockSize = mainBlock->row_block_size; const index_t colBlockSize = mainBlock->col_block_size; Pattern_ptr pat(new Pattern(mainBlock->pattern->type, global_n, global_n, ptr_global, idx_global)); SparseMatrix_ptr out(new SparseMatrix(mainBlock->type, pat, rowBlockSize, colBlockSize, false)); // Finally, receive and copy the values iptr = temp_len[0] * block_size; for (index_t i=1; i<size; i++) { len = temp_len[i]; MPI_Irecv(&out->val[iptr], len * block_size, MPI_DOUBLE, i, mpi_info->counter()+i, mpi_info->comm, &mpi_requests[i]); iptr += len*block_size; } memcpy(out->val, val, temp_len[0] * sizeof(double) * block_size); delete[] val; mpi_info->incCounter(size); MPI_Waitall(size-1, &mpi_requests[1], &mpi_stati[0]); delete[] temp_len; return out; } else { // it's not rank 0 // First, send out the local ptr index_t tag = mpi_info->counter()+rank; MPI_Issend(&ptr[1], n, MPI_INT, 0, tag, mpi_info->comm, &mpi_requests[0]); // Next, send out the local idx index_t len = ptr[n]; tag += size; MPI_Issend(idx, len, MPI_INT, 0, tag, mpi_info->comm, &mpi_requests[1]); // At last, send out the local val len *= block_size; tag += size; MPI_Issend(val, len, MPI_DOUBLE, 0, tag, mpi_info->comm, &mpi_requests[2]); MPI_Waitall(3, &mpi_requests[0], &mpi_stati[0]); mpi_info->setCounter(tag + size - rank); delete[] ptr; delete[] idx; delete[] val; } // rank #endif return SparseMatrix_ptr(); }