int main (int argc, char *argv[]) { MPI_Init (&argc, &argv); int nProc, iProc; MPI_Comm_rank (MPI_COMM_WORLD, &iProc); MPI_Comm_size (MPI_COMM_WORLD, &nProc); // number of threads const int NTHREADS = 1; // number of buffers const int NWAY = 2; // left neighbour const int left = LEFT(iProc, nProc); // right neighbour const int right = RIGHT(iProc, nProc); // allocate array of for local vector, left halo and right halo double* array = malloc (NWAY * (NTHREADS+2) * 2 * VLEN * sizeof (double)); ASSERT (array != 0); // initial buffer id int buffer_id = 0; // initialize data data_init (NTHREADS, iProc, buffer_id, array); MPI_Barrier (MPI_COMM_WORLD); double time = -now(); for (int k = 0; k < NITER; ++k) { for (int i = 0; i < nProc; ++i) { MPI_Request send_req[2]; MPI_Request recv_req[2]; int slice_id = 1; int left_halo = 0; int right_halo = 2; // post recv MPI_Irecv ( &array_ELEM_right (buffer_id, left_halo, 0), VLEN, MPI_DOUBLE , left, i, MPI_COMM_WORLD, &recv_req[0]); MPI_Irecv ( &array_ELEM_left (buffer_id, right_halo, 0), VLEN, MPI_DOUBLE , right, i, MPI_COMM_WORLD, &recv_req[1]); // issue send MPI_Isend ( &array_ELEM_right (buffer_id, right_halo - 1, 0), VLEN, MPI_DOUBLE , right, i, MPI_COMM_WORLD, &send_req[0]); MPI_Isend ( &array_ELEM_left (buffer_id, left_halo + 1, 0), VLEN, MPI_DOUBLE , left, i, MPI_COMM_WORLD, &send_req[1]); // wait for recv MPI_Waitall (2, recv_req, MPI_STATUSES_IGNORE); // compute data, read from id "buffer_id", write to id "1 - buffer_id" data_compute (NTHREADS, array, 1 - buffer_id, buffer_id, slice_id); // wait for send MPI_Waitall (2, send_req, MPI_STATUSES_IGNORE); // alternate the buffer buffer_id = 1 - buffer_id; } } time += now(); data_verify (NTHREADS, iProc, (NITER * nProc) % NWAY, array); printf ("# mpi %s nProc %d vlen %i niter %d nthreads %i nway %i time %g\n" , argv[0], nProc, VLEN, NITER, NTHREADS, NWAY, time ); MPI_Finalize(); free (array); return EXIT_SUCCESS; }
int Zoltan_Comm_Do_Post( ZOLTAN_COMM_OBJ * plan, /* communication data structure */ int tag, /* message tag for communicating */ char *send_data, /* array of data I currently own */ int nbytes, /* multiplier for sizes */ char *recv_data) /* array of data I'll own after comm */ { char *send_buff; /* space to buffer outgoing data */ int my_proc; /* processor ID */ unsigned int self_recv_address = 0;/* where in recv_data self info starts */ int self_num=0; /* where in send list my_proc appears */ int offset; /* offset into array I'm copying into */ int self_index = 0; /* send offset for data I'm keeping */ int out_of_mem; /* am I out of memory? */ int nblocks; /* number of procs who need my data */ int proc_index; /* loop counter over procs to send to */ int i, j, k, jj; /* loop counters */ static char *yo = "Zoltan_Comm_Do_Post"; /* Check input parameters */ if (!plan) { MPI_Comm_rank(MPI_COMM_WORLD, &my_proc); ZOLTAN_COMM_ERROR("Communication plan = NULL", yo, my_proc); return ZOLTAN_FATAL; } /* If not point to point, currently we do synchroneous communications */ if (plan->maxed_recvs){ int status; status = Zoltan_Comm_Do_AlltoAll(plan, send_data, nbytes, recv_data); return (status); } MPI_Comm_rank(plan->comm, &my_proc); if ((plan->nsends + plan->self_msg) && !send_data) { int sum = 0; if (plan->sizes_to) /* Not an error if all sizes_to == 0 */ for (i = 0; i < (plan->nsends + plan->self_msg); i++) sum += plan->sizes_to[i]; if (!plan->sizes_to || (plan->sizes_to && sum)) { ZOLTAN_COMM_ERROR("nsends not zero, but send_data = NULL", yo, my_proc); return ZOLTAN_FATAL; } } if ((plan->nrecvs + plan->self_msg) && !recv_data) { int sum = 0; if (plan->sizes_from) /* Not an error if all sizes_from == 0 */ for (i = 0; i < (plan->nrecvs + plan->self_msg); i++) sum += plan->sizes_from[i]; if (!plan->sizes_from || (plan->sizes_from && sum)) { ZOLTAN_COMM_ERROR("nrecvs not zero, but recv_data = NULL", yo, my_proc); return ZOLTAN_FATAL; } } if (nbytes < 0) { ZOLTAN_COMM_ERROR("Scale factor nbytes is negative", yo, my_proc); return ZOLTAN_FATAL; } /* Post irecvs */ out_of_mem = 0; if (plan->indices_from == NULL) { /* Data can go directly into user space. */ plan->recv_buff = recv_data; } else { /* Need to buffer receive to reorder */ plan->recv_buff = (char *) ZOLTAN_MALLOC(plan->total_recv_size * nbytes); if (plan->recv_buff == NULL && plan->total_recv_size * nbytes != 0) out_of_mem = 1; } if (!out_of_mem) { if (plan->sizes == NULL) { /* All data the same size */ k = 0; for (i = 0; i < plan->nrecvs + plan->self_msg; i++) { if (plan->procs_from[i] != my_proc) { MPI_Irecv((void *) & plan->recv_buff[plan->starts_from[i] * nbytes], plan->lengths_from[i] * nbytes, (MPI_Datatype) MPI_BYTE, plan->procs_from[i], tag, plan->comm, &plan->request[k]); k++; } else { self_recv_address = plan->starts_from[i] * nbytes; } } } else { /* Data of varying sizes */ k = 0; for (i = 0; i < plan->nrecvs + plan->self_msg; i++) { if (plan->procs_from[i] != my_proc) { if (plan->sizes_from[i]) MPI_Irecv((void *) &plan->recv_buff[plan->starts_from_ptr[i] * nbytes], plan->sizes_from[i] * nbytes, (MPI_Datatype) MPI_BYTE, plan->procs_from[i], tag, plan->comm, &plan->request[k]); else plan->request[k] = MPI_REQUEST_NULL; k++; } else { self_recv_address = plan->starts_from_ptr[i] * nbytes; } } } } /* Do remaining allocation to check for any mem problems. */ if (plan->indices_to != NULL) { /* can't sent straight from input */ send_buff = (char *) ZOLTAN_MALLOC(plan->max_send_size * nbytes); if (send_buff == 0 && plan->max_send_size * nbytes != 0) out_of_mem = 1; } else send_buff = NULL; /* Barrier to ensure irecvs are posted before doing any sends. */ /* Simultaneously see if anyone out of memory */ MPI_Allreduce(&out_of_mem, &j, 1, MPI_INT, MPI_SUM, plan->comm); if (j > 0) { /* Some proc is out of memory -> Punt */ ZOLTAN_FREE(&send_buff); if (plan->indices_from != NULL) ZOLTAN_FREE(&plan->recv_buff); return (ZOLTAN_MEMERR); } /* Send out data */ /* Scan through procs_to list to start w/ higher numbered procs */ /* This should balance message traffic. */ nblocks = plan->nsends + plan->self_msg; proc_index = 0; while (proc_index < nblocks && plan->procs_to[proc_index] < my_proc) proc_index++; if (proc_index == nblocks) proc_index = 0; if (plan->sizes == NULL) { /* Data all of same size */ if (plan->indices_to == NULL) { /* data already blocked by processor. */ for (i = proc_index, j = 0; j < nblocks; j++) { if (plan->procs_to[i] != my_proc) { MPI_Rsend((void *) &send_data[plan->starts_to[i] * nbytes], plan->lengths_to[i] * nbytes, (MPI_Datatype) MPI_BYTE, plan->procs_to[i], tag, plan->comm); } else self_num = i; if (++i == nblocks) i = 0; } if (plan->self_msg) { /* Copy data to self. */ /* I use array+offset instead of &(array[offset]) because of a bug with PGI v9 */ /* I use memmove because I'm not sure that the pointer are not overlapped. */ memmove(plan->recv_buff+self_recv_address, send_data+plan->starts_to[self_num] * nbytes, plan->lengths_to[self_num]*nbytes); } } else { /* Not blocked by processor. Need to buffer. */ for (i = proc_index, jj = 0; jj < nblocks; jj++) { if (plan->procs_to[i] != my_proc) { /* Need to pack message first. */ offset = 0; j = plan->starts_to[i]; for (k = 0; k < plan->lengths_to[i]; k++) { memcpy(&send_buff[offset], &send_data[plan->indices_to[j++] * nbytes], nbytes); offset += nbytes; } MPI_Rsend((void *) send_buff, plan->lengths_to[i] * nbytes, (MPI_Datatype) MPI_BYTE, plan->procs_to[i], tag, plan->comm); } else { self_num = i; self_index = plan->starts_to[i]; } if (++i == nblocks) i = 0; } if (plan->self_msg) { /* Copy data to self. */ for (k = 0; k < plan->lengths_to[self_num]; k++) { memcpy(&plan->recv_buff[self_recv_address], &send_data[plan->indices_to[self_index++] * nbytes], nbytes); self_recv_address += nbytes; } } ZOLTAN_FREE(&send_buff); } } else { /* Data of differing sizes */ if (plan->indices_to == NULL) { /* data already blocked by processor. */ for (i = proc_index, j = 0; j < nblocks; j++) { if (plan->procs_to[i] != my_proc) { if (plan->sizes_to[i]) { MPI_Rsend((void *) &send_data[plan->starts_to_ptr[i] * nbytes], plan->sizes_to[i] * nbytes, (MPI_Datatype) MPI_BYTE, plan->procs_to[i], tag, plan->comm); } } else self_num = i; if (++i == nblocks) i = 0; } if (plan->self_msg) { /* Copy data to self. */ if (plan->sizes_to[self_num]) { char* lrecv = &plan->recv_buff[self_recv_address]; char* lsend = &send_data[plan->starts_to_ptr[self_num] * nbytes]; int sindex = plan->sizes_to[self_num], idx; for (idx=0; idx<nbytes; idx++) { memcpy(lrecv, lsend, sindex); lrecv += sindex; lsend += sindex; } } } } else { /* Not blocked by processor. Need to buffer. */ for (i = proc_index, jj = 0; jj < nblocks; jj++) { if (plan->procs_to[i] != my_proc) { /* Need to pack message first. */ offset = 0; j = plan->starts_to[i]; for (k = 0; k < plan->lengths_to[i]; k++) { if (plan->sizes[plan->indices_to[j]]) { memcpy(&send_buff[offset], &send_data[plan->indices_to_ptr[j] * nbytes], plan->sizes[plan->indices_to[j]] * nbytes); offset += plan->sizes[plan->indices_to[j]] * nbytes; } j++; } if (plan->sizes_to[i]) { MPI_Rsend((void *) send_buff, plan->sizes_to[i] * nbytes, (MPI_Datatype) MPI_BYTE, plan->procs_to[i], tag, plan->comm); } } else self_num = i; if (++i == nblocks) i = 0; } if (plan->self_msg) { /* Copy data to self. */ if (plan->sizes_to[self_num]) { j = plan->starts_to[self_num]; for (k = 0; k < plan->lengths_to[self_num]; k++) { int kk = plan->indices_to_ptr[j]; char* lrecv = &plan->recv_buff[self_recv_address]; unsigned int send_idx = kk * nbytes; char* lsend = &send_data[send_idx]; int sindex = plan->sizes[plan->indices_to[j]], idx; for (idx=0; idx<nbytes; idx++) { memcpy(lrecv, lsend, sindex); lrecv += sindex; lsend += sindex; } self_recv_address += plan->sizes[plan->indices_to[j]] * nbytes; j++; } } } ZOLTAN_FREE(&send_buff); } } return (ZOLTAN_OK); }
void peano::applications::poisson::multigrid::records::RegularGridCellPacked::receive(int source, int tag) { MPI_Request* sendRequestHandle = new MPI_Request(); MPI_Status status; int flag = 0; int result; clock_t timeOutWarning = -1; clock_t timeOutShutdown = -1; bool triggeredTimeoutWarning = false; result = MPI_Irecv( this, 1, Datatype, source, tag, tarch::parallel::Node::getInstance().getCommunicator(), sendRequestHandle ); if ( result != MPI_SUCCESS ) { std::ostringstream msg; msg << "failed to start to receive peano::applications::poisson::multigrid::records::RegularGridCellPacked from node " << source << ": " << tarch::parallel::MPIReturnValueToString(result); _log.error( "receive(int)", msg.str() ); } result = MPI_Test( sendRequestHandle, &flag, &status ); while (!flag) { if (timeOutWarning==-1) timeOutWarning = tarch::parallel::Node::getInstance().getDeadlockWarningTimeStamp(); if (timeOutShutdown==-1) timeOutShutdown = tarch::parallel::Node::getInstance().getDeadlockTimeOutTimeStamp(); result = MPI_Test( sendRequestHandle, &flag, &status ); if (result!=MPI_SUCCESS) { std::ostringstream msg; msg << "testing for finished receive task for peano::applications::poisson::multigrid::records::RegularGridCellPacked failed: " << tarch::parallel::MPIReturnValueToString(result); _log.error("receive(int)", msg.str() ); } // deadlock aspect if ( tarch::parallel::Node::getInstance().isTimeOutWarningEnabled() && (clock()>timeOutWarning) && (!triggeredTimeoutWarning) ) { tarch::parallel::Node::getInstance().writeTimeOutWarning( "peano::applications::poisson::multigrid::records::RegularGridCellPacked", "receive(int)", source ); triggeredTimeoutWarning = true; } if ( tarch::parallel::Node::getInstance().isTimeOutDeadlockEnabled() && (clock()>timeOutShutdown) ) { tarch::parallel::Node::getInstance().triggerDeadlockTimeOut( "peano::applications::poisson::multigrid::records::RegularGridCellPacked", "receive(int)", source ); } tarch::parallel::Node::getInstance().receiveDanglingMessages(); } delete sendRequestHandle; _senderRank = status.MPI_SOURCE; #ifdef Debug _log.debug("receive(int,int)", "received " + toString() ); #endif }
int main(int argc, char** argv) { int rank, size; int arraySize, chunkSize; int *array, *chunk, *y; int localHigh, i, elements; double start, stop; MPI_Request *requests; MPI_Status *status; if (MPI_Init(&argc, &argv) != MPI_SUCCESS) { fprintf(stderr, "Unable to initialize MPI!\n"); return -1; } if(argc != 2 || (elements = atoi(argv[1])) == 0) { fprintf(stdout, "No element count parameter given!\n"); return -1; } // get rank and size from communicator MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Barrier(MPI_COMM_WORLD); arraySize = elements - elements % size; chunkSize = arraySize / size; if (rank == 0) { requests = (MPI_Request*) malloc(sizeof (MPI_Request) * (size - 1)); status = (MPI_Status*) malloc(sizeof (MPI_Status) * (size - 1)); array = (int*) malloc(sizeof (int) * arraySize); init(array, arraySize); start = MPI_Wtime(); } y = (int*) malloc(sizeof (int) * size); chunk = (int*) malloc(sizeof (int) * chunkSize); MPI_Scatter(array, chunkSize, MPI_INT, chunk, chunkSize, MPI_INT, 0, MPI_COMM_WORLD); // Compute local prefix sums localHigh = scanIterativeLocal(chunk, chunkSize); if (rank != 0) { MPI_Send(&localHigh, 1, MPI_INT, 0, PREFIXSUM_TAG, MPI_COMM_WORLD); } else { y[0] = localHigh; for (i = 1; i < size; i++) { MPI_Irecv(&y[i], 1, MPI_INT, i, PREFIXSUM_TAG, MPI_COMM_WORLD, &requests[i - 1]); } MPI_Waitall(size - 1, requests, status); } MPI_Bcast(y, size, MPI_INT, 0, MPI_COMM_WORLD); if (rank != 0) { sumIterativeLocal(chunk, y, rank, chunkSize); } MPI_Gather(chunk, chunkSize, MPI_INT, array, chunkSize, MPI_INT, 0, MPI_COMM_WORLD); if (rank == 0) { stop = MPI_Wtime(); fprintf(stdout, "%d;%d;%f\n", size, arraySize, stop - start); verify(array, arraySize); } free(chunk); if (rank == 0) { free(requests); free(array); } MPI_Finalize(); fflush(stdout); return (EXIT_SUCCESS); }
/* data1, odata1 and odata2 are packed in the format (for communication): data[0] = is_max, no of is data[1] = size of is[0] ... data[is_max] = size of is[is_max-1] data[is_max + 1] = data(is[0]) ... data[is_max+1+sum(size of is[k]), k=0,...,i-1] = data(is[i]) ... data2 is packed in the format (for creating output is[]): data[0] = is_max, no of is data[1] = size of is[0] ... data[is_max] = size of is[is_max-1] data[is_max + 1] = data(is[0]) ... data[is_max + 1 + Mbs*i) = data(is[i]) ... */ static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat C,PetscInt is_max,IS is[]) { Mat_MPISBAIJ *c = (Mat_MPISBAIJ*)C->data; PetscErrorCode ierr; PetscMPIInt size,rank,tag1,tag2,*len_s,nrqr,nrqs,*id_r1,*len_r1,flag,len,*iwork; const PetscInt *idx_i; PetscInt idx,isz,col,*n,*data1,**data1_start,*data2,*data2_i,*data,*data_i; PetscInt Mbs,i,j,k,*odata1,*odata2; PetscInt proc_id,**odata2_ptr,*ctable=0,*btable,len_max,len_est; PetscInt proc_end=0,len_unused,nodata2; PetscInt ois_max; /* max no of is[] in each of processor */ char *t_p; MPI_Comm comm; MPI_Request *s_waits1,*s_waits2,r_req; MPI_Status *s_status,r_status; PetscBT *table; /* mark indices of this processor's is[] */ PetscBT table_i; PetscBT otable; /* mark indices of other processors' is[] */ PetscInt bs=C->rmap->bs,Bn = c->B->cmap->n,Bnbs = Bn/bs,*Bowners; IS garray_local,garray_gl; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)C,&comm); CHKERRQ(ierr); size = c->size; rank = c->rank; Mbs = c->Mbs; ierr = PetscObjectGetNewTag((PetscObject)C,&tag1); CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag2); CHKERRQ(ierr); /* create tables used in step 1: table[i] - mark c->garray of proc [i] step 3: table[i] - mark indices of is[i] when whose=MINE table[0] - mark incideces of is[] when whose=OTHER */ len = PetscMax(is_max, size); CHKERRQ(ierr); ierr = PetscMalloc2(len,&table,(Mbs/PETSC_BITS_PER_BYTE+1)*len,&t_p); CHKERRQ(ierr); for (i=0; i<len; i++) { table[i] = t_p + (Mbs/PETSC_BITS_PER_BYTE+1)*i; } ierr = MPIU_Allreduce(&is_max,&ois_max,1,MPIU_INT,MPI_MAX,comm); CHKERRQ(ierr); /* 1. Send this processor's is[] to other processors */ /*---------------------------------------------------*/ /* allocate spaces */ ierr = PetscMalloc1(is_max,&n); CHKERRQ(ierr); len = 0; for (i=0; i<is_max; i++) { ierr = ISGetLocalSize(is[i],&n[i]); CHKERRQ(ierr); len += n[i]; } if (!len) { is_max = 0; } else { len += 1 + is_max; /* max length of data1 for one processor */ } ierr = PetscMalloc1(size*len+1,&data1); CHKERRQ(ierr); ierr = PetscMalloc1(size,&data1_start); CHKERRQ(ierr); for (i=0; i<size; i++) data1_start[i] = data1 + i*len; ierr = PetscMalloc4(size,&len_s,size,&btable,size,&iwork,size+1,&Bowners); CHKERRQ(ierr); /* gather c->garray from all processors */ ierr = ISCreateGeneral(comm,Bnbs,c->garray,PETSC_COPY_VALUES,&garray_local); CHKERRQ(ierr); ierr = ISAllGather(garray_local, &garray_gl); CHKERRQ(ierr); ierr = ISDestroy(&garray_local); CHKERRQ(ierr); ierr = MPI_Allgather(&Bnbs,1,MPIU_INT,Bowners+1,1,MPIU_INT,comm); CHKERRQ(ierr); Bowners[0] = 0; for (i=0; i<size; i++) Bowners[i+1] += Bowners[i]; if (is_max) { /* hash table ctable which maps c->row to proc_id) */ ierr = PetscMalloc1(Mbs,&ctable); CHKERRQ(ierr); for (proc_id=0,j=0; proc_id<size; proc_id++) { for (; j<C->rmap->range[proc_id+1]/bs; j++) ctable[j] = proc_id; } /* hash tables marking c->garray */ ierr = ISGetIndices(garray_gl,&idx_i); CHKERRQ(ierr); for (i=0; i<size; i++) { table_i = table[i]; ierr = PetscBTMemzero(Mbs,table_i); CHKERRQ(ierr); for (j = Bowners[i]; j<Bowners[i+1]; j++) { /* go through B cols of proc[i]*/ ierr = PetscBTSet(table_i,idx_i[j]); CHKERRQ(ierr); } } ierr = ISRestoreIndices(garray_gl,&idx_i); CHKERRQ(ierr); } /* if (is_max) */ ierr = ISDestroy(&garray_gl); CHKERRQ(ierr); /* evaluate communication - mesg to who, length, and buffer space */ for (i=0; i<size; i++) len_s[i] = 0; /* header of data1 */ for (proc_id=0; proc_id<size; proc_id++) { iwork[proc_id] = 0; *data1_start[proc_id] = is_max; data1_start[proc_id]++; for (j=0; j<is_max; j++) { if (proc_id == rank) { *data1_start[proc_id] = n[j]; } else { *data1_start[proc_id] = 0; } data1_start[proc_id]++; } } for (i=0; i<is_max; i++) { ierr = ISGetIndices(is[i],&idx_i); CHKERRQ(ierr); for (j=0; j<n[i]; j++) { idx = idx_i[j]; *data1_start[rank] = idx; data1_start[rank]++; /* for local proccessing */ proc_end = ctable[idx]; for (proc_id=0; proc_id<=proc_end; proc_id++) { /* for others to process */ if (proc_id == rank) continue; /* done before this loop */ if (proc_id < proc_end && !PetscBTLookup(table[proc_id],idx)) continue; /* no need for sending idx to [proc_id] */ *data1_start[proc_id] = idx; data1_start[proc_id]++; len_s[proc_id]++; } } /* update header data */ for (proc_id=0; proc_id<size; proc_id++) { if (proc_id== rank) continue; *(data1 + proc_id*len + 1 + i) = len_s[proc_id] - iwork[proc_id]; iwork[proc_id] = len_s[proc_id]; } ierr = ISRestoreIndices(is[i],&idx_i); CHKERRQ(ierr); } nrqs = 0; nrqr = 0; for (i=0; i<size; i++) { data1_start[i] = data1 + i*len; if (len_s[i]) { nrqs++; len_s[i] += 1 + is_max; /* add no. of header msg */ } } for (i=0; i<is_max; i++) { ierr = ISDestroy(&is[i]); CHKERRQ(ierr); } ierr = PetscFree(n); CHKERRQ(ierr); ierr = PetscFree(ctable); CHKERRQ(ierr); /* Determine the number of messages to expect, their lengths, from from-ids */ ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&nrqr); CHKERRQ(ierr); ierr = PetscGatherMessageLengths(comm,nrqs,nrqr,len_s,&id_r1,&len_r1); CHKERRQ(ierr); /* Now post the sends */ ierr = PetscMalloc2(size,&s_waits1,size,&s_waits2); CHKERRQ(ierr); k = 0; for (proc_id=0; proc_id<size; proc_id++) { /* send data1 to processor [proc_id] */ if (len_s[proc_id]) { ierr = MPI_Isend(data1_start[proc_id],len_s[proc_id],MPIU_INT,proc_id,tag1,comm,s_waits1+k); CHKERRQ(ierr); k++; } } /* 2. Receive other's is[] and process. Then send back */ /*-----------------------------------------------------*/ len = 0; for (i=0; i<nrqr; i++) { if (len_r1[i] > len) len = len_r1[i]; } ierr = PetscFree(len_r1); CHKERRQ(ierr); ierr = PetscFree(id_r1); CHKERRQ(ierr); for (proc_id=0; proc_id<size; proc_id++) len_s[proc_id] = iwork[proc_id] = 0; ierr = PetscMalloc1(len+1,&odata1); CHKERRQ(ierr); ierr = PetscMalloc1(size,&odata2_ptr); CHKERRQ(ierr); ierr = PetscBTCreate(Mbs,&otable); CHKERRQ(ierr); len_max = ois_max*(Mbs+1); /* max space storing all is[] for each receive */ len_est = 2*len_max; /* estimated space of storing is[] for all receiving messages */ ierr = PetscMalloc1(len_est+1,&odata2); CHKERRQ(ierr); nodata2 = 0; /* nodata2+1: num of PetscMalloc(,&odata2_ptr[]) called */ odata2_ptr[nodata2] = odata2; len_unused = len_est; /* unused space in the array odata2_ptr[nodata2]-- needs to be >= len_max */ k = 0; while (k < nrqr) { /* Receive messages */ ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag1,comm,&flag,&r_status); CHKERRQ(ierr); if (flag) { ierr = MPI_Get_count(&r_status,MPIU_INT,&len); CHKERRQ(ierr); proc_id = r_status.MPI_SOURCE; ierr = MPI_Irecv(odata1,len,MPIU_INT,proc_id,r_status.MPI_TAG,comm,&r_req); CHKERRQ(ierr); ierr = MPI_Wait(&r_req,&r_status); CHKERRQ(ierr); /* Process messages */ /* make sure there is enough unused space in odata2 array */ if (len_unused < len_max) { /* allocate more space for odata2 */ ierr = PetscMalloc1(len_est+1,&odata2); CHKERRQ(ierr); odata2_ptr[++nodata2] = odata2; len_unused = len_est; } ierr = MatIncreaseOverlap_MPISBAIJ_Local(C,odata1,OTHER,odata2,&otable); CHKERRQ(ierr); len = 1 + odata2[0]; for (i=0; i<odata2[0]; i++) len += odata2[1 + i]; /* Send messages back */ ierr = MPI_Isend(odata2,len,MPIU_INT,proc_id,tag2,comm,s_waits2+k); CHKERRQ(ierr); k++; odata2 += len; len_unused -= len; len_s[proc_id] = len; /* num of messages sending back to [proc_id] by this proc */ } } ierr = PetscFree(odata1); CHKERRQ(ierr); ierr = PetscBTDestroy(&otable); CHKERRQ(ierr); /* 3. Do local work on this processor's is[] */ /*-------------------------------------------*/ /* make sure there is enough unused space in odata2(=data) array */ len_max = is_max*(Mbs+1); /* max space storing all is[] for this processor */ if (len_unused < len_max) { /* allocate more space for odata2 */ ierr = PetscMalloc1(len_est+1,&odata2); CHKERRQ(ierr); odata2_ptr[++nodata2] = odata2; } data = odata2; ierr = MatIncreaseOverlap_MPISBAIJ_Local(C,data1_start[rank],MINE,data,table); CHKERRQ(ierr); ierr = PetscFree(data1_start); CHKERRQ(ierr); /* 4. Receive work done on other processors, then merge */ /*------------------------------------------------------*/ /* get max number of messages that this processor expects to recv */ ierr = MPIU_Allreduce(len_s,iwork,size,MPI_INT,MPI_MAX,comm); CHKERRQ(ierr); ierr = PetscMalloc1(iwork[rank]+1,&data2); CHKERRQ(ierr); ierr = PetscFree4(len_s,btable,iwork,Bowners); CHKERRQ(ierr); k = 0; while (k < nrqs) { /* Receive messages */ ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag2,comm,&flag,&r_status); CHKERRQ(ierr); if (flag) { ierr = MPI_Get_count(&r_status,MPIU_INT,&len); CHKERRQ(ierr); proc_id = r_status.MPI_SOURCE; ierr = MPI_Irecv(data2,len,MPIU_INT,proc_id,r_status.MPI_TAG,comm,&r_req); CHKERRQ(ierr); ierr = MPI_Wait(&r_req,&r_status); CHKERRQ(ierr); if (len > 1+is_max) { /* Add data2 into data */ data2_i = data2 + 1 + is_max; for (i=0; i<is_max; i++) { table_i = table[i]; data_i = data + 1 + is_max + Mbs*i; isz = data[1+i]; for (j=0; j<data2[1+i]; j++) { col = data2_i[j]; if (!PetscBTLookupSet(table_i,col)) data_i[isz++] = col; } data[1+i] = isz; if (i < is_max - 1) data2_i += data2[1+i]; } } k++; } } ierr = PetscFree(data2); CHKERRQ(ierr); ierr = PetscFree2(table,t_p); CHKERRQ(ierr); /* phase 1 sends are complete */ ierr = PetscMalloc1(size,&s_status); CHKERRQ(ierr); if (nrqs) { ierr = MPI_Waitall(nrqs,s_waits1,s_status); CHKERRQ(ierr); } ierr = PetscFree(data1); CHKERRQ(ierr); /* phase 2 sends are complete */ if (nrqr) { ierr = MPI_Waitall(nrqr,s_waits2,s_status); CHKERRQ(ierr); } ierr = PetscFree2(s_waits1,s_waits2); CHKERRQ(ierr); ierr = PetscFree(s_status); CHKERRQ(ierr); /* 5. Create new is[] */ /*--------------------*/ for (i=0; i<is_max; i++) { data_i = data + 1 + is_max + Mbs*i; ierr = ISCreateGeneral(PETSC_COMM_SELF,data[1+i],data_i,PETSC_COPY_VALUES,is+i); CHKERRQ(ierr); } for (k=0; k<=nodata2; k++) { ierr = PetscFree(odata2_ptr[k]); CHKERRQ(ierr); } ierr = PetscFree(odata2_ptr); CHKERRQ(ierr); PetscFunctionReturn(0); }
void BroadCast_ComplexMatrix(MPI_Comm MPI_Curret_Comm_WD, dcomplex **Mat, int n, int *is1, int *ie1, int myid, int numprocs, MPI_Status *stat_send, MPI_Request *request_send, MPI_Request *request_recv) { int tag=999; long long int i,j,ID,N; long long int k,k0,k1,num0,num1; double *Mat1; N = n; /********************************************* Elemements are stored from 1 to n in Mat. **********************************************/ if (numprocs!=1){ Mat1 = (double*)malloc(sizeof(double)*(N+1)*(N+1)); /******************************** Real part of Mat ********************************/ for (i=is1[myid]; i<=ie1[myid]; i++){ for (j=1; j<=N; j++){ k = (i-1)*N + j - 1; Mat1[k] = Mat[i][j].r; } } /* receiving */ for (ID=0; ID<numprocs; ID++){ k1 = (is1[ID]-1)*N; if (k1<0) k1 = 0; num1 = (ie1[ID] - is1[ID] + 1)*N; if (num1<0 || ID==myid) num1 = 0; MPI_Irecv(&Mat1[k1], num1, MPI_DOUBLE, ID, tag, MPI_Curret_Comm_WD, &request_recv[ID]); } /* sending */ k0 = (is1[myid]-1)*N; if (k0<0) k0 = 0; num0 = (ie1[myid] - is1[myid] + 1)*N; if (num0<0) num0 = 0; for (ID=0; ID<numprocs; ID++){ if (ID!=myid) MPI_Isend(&Mat1[k0], num0, MPI_DOUBLE, ID, tag, MPI_Curret_Comm_WD, &request_send[ID]); else MPI_Isend(&Mat1[k0], 0, MPI_DOUBLE, ID, tag, MPI_Curret_Comm_WD, &request_send[ID]); } /* waitall */ MPI_Waitall(numprocs,request_recv,stat_send); MPI_Waitall(numprocs,request_send,stat_send); for (ID=0; ID<numprocs; ID++){ for (i=is1[ID]; i<=ie1[ID]; i++){ for (j=1; j<=N; j++){ k = (i-1)*N + j - 1; Mat[i][j].r = Mat1[k]; } } } /******************************** Imaginary part of Mat ********************************/ for (i=is1[myid]; i<=ie1[myid]; i++){ for (j=1; j<=N; j++){ k = (i-1)*N + j - 1; Mat1[k] = Mat[i][j].i; } } /* receiving */ for (ID=0; ID<numprocs; ID++){ k1 = (is1[ID]-1)*N; if (k1<0) k1 = 0; num1 = (ie1[ID] - is1[ID] + 1)*N; if (num1<0 || ID==myid) num1 = 0; MPI_Irecv(&Mat1[k1], num1, MPI_DOUBLE, ID, tag, MPI_Curret_Comm_WD, &request_recv[ID]); } /* sending */ k0 = (is1[myid]-1)*N; if (k0<0) k0 = 0; num0 = (ie1[myid] - is1[myid] + 1)*N; if (num0<0) num0 = 0; for (ID=0; ID<numprocs; ID++){ if (ID!=myid) MPI_Isend(&Mat1[k0], num0, MPI_DOUBLE, ID, tag, MPI_Curret_Comm_WD, &request_send[ID]); else MPI_Isend(&Mat1[k0], 0, MPI_DOUBLE, ID, tag, MPI_Curret_Comm_WD, &request_send[ID]); } /* waitall */ MPI_Waitall(numprocs,request_recv,stat_send); MPI_Waitall(numprocs,request_send,stat_send); for (ID=0; ID<numprocs; ID++){ for (i=is1[ID]; i<=ie1[ID]; i++){ for (j=1; j<=N; j++){ k = (i-1)*N + j - 1; Mat[i][j].i = Mat1[k]; } } } free(Mat1); } }
int master_main(const char *host, int port, const char *addr) { time_t idle_stoptime; struct link *master = NULL; int num_workers, i; struct mpi_queue_job **workers; struct itable *active_jobs = itable_create(0); struct itable *waiting_jobs = itable_create(0); struct list *complete_jobs = list_create(); MPI_Comm_size(MPI_COMM_WORLD, &num_workers); workers = malloc(num_workers * sizeof(*workers)); memset(workers, 0, num_workers * sizeof(*workers)); idle_stoptime = time(0) + idle_timeout; while(!abort_flag) { char line[MPI_QUEUE_LINE_MAX]; if(time(0) > idle_stoptime) { if(master) { printf("mpi master: gave up after waiting %ds to receive a task.\n", idle_timeout); } else { printf("mpi master: gave up after waiting %ds to connect to %s port %d.\n", idle_timeout, host, port); } break; } if(!master) { char working_dir[MPI_QUEUE_LINE_MAX]; master = link_connect(addr, port, idle_stoptime); if(!master) { sleep(5); continue; } link_tune(master, LINK_TUNE_INTERACTIVE); link_readline(master, line, sizeof(line), time(0) + active_timeout); memset(working_dir, 0, MPI_QUEUE_LINE_MAX); if(sscanf(line, "workdir %s", working_dir) == 1) { MPI_Bcast(working_dir, MPI_QUEUE_LINE_MAX, MPI_CHAR, 0, MPI_COMM_WORLD); } else { link_close(master); master = NULL; continue; } } if(link_readline(master, line, sizeof(line), time(0) + short_timeout)) { struct mpi_queue_operation *op; int jobid, mode; INT64_T length; char path[MPI_QUEUE_LINE_MAX]; op = NULL; debug(D_MPI, "received: %s\n", line); if(!strcmp(line, "get results")) { struct mpi_queue_job *job; debug(D_MPI, "results requested: %d available\n", list_size(complete_jobs)); link_putfstring(master, "num results %d\n", time(0) + active_timeout, list_size(complete_jobs)); while(list_size(complete_jobs)) { job = list_pop_head(complete_jobs); link_putfstring(master, "result %d %d %d %lld\n", time(0) + active_timeout, job->jobid, job->status, job->result, job->output_length); if(job->output_length) { link_write(master, job->output, job->output_length, time(0)+active_timeout); } mpi_queue_job_delete(job); } } else if(sscanf(line, "work %d %lld", &jobid, &length)) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_WORK; op->buffer_length = length+1; op->buffer = malloc(length+1); op->buffer[op->buffer_length] = 0; link_read(master, op->buffer, length, time(0) + active_timeout); op->result = -1; } else if(sscanf(line, "stat %d %s", &jobid, path) == 2) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_STAT; sprintf(op->args, "%s", path); op->result = -1; } else if(sscanf(line, "unlink %d %s", &jobid, path) == 2) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_UNLINK; sprintf(op->args, "%s", path); op->result = -1; } else if(sscanf(line, "mkdir %d %s %o", &jobid, path, &mode) == 3) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_MKDIR; sprintf(op->args, "%s %o", path, mode); op->result = -1; } else if(sscanf(line, "close %d", &jobid) == 1) { op = malloc(sizeof(*op)); memset(op, 0, sizeof(*op)); op->type = MPI_QUEUE_OP_CLOSE; op->result = -1; // } else if(sscanf(line, "symlink %d %s %s", &jobid, path, filename) == 3) { // } else if(sscanf(line, "put %d %s %lld %o", &jobid, filename, &length, &mode) == 4) { // } else if(sscanf(line, "rget %d %s", &jobid, filename) == 2) { // } else if(sscanf(line, "get %d %s", &jobid, filename) == 2) { // } else if(sscanf(line, "thirdget %d %d %s %[^\n]", &jobid, &mode, filename, path) == 4) { // } else if(sscanf(line, "thirdput %d %d %s %[^\n]", &jobid, &mode, filename, path) == 4) { } else if(!strcmp(line, "exit")) { break; } else { abort_flag = 1; continue; } if(op) { struct mpi_queue_job *job; job = itable_lookup(active_jobs, jobid); if(!job) { job = itable_lookup(waiting_jobs, jobid); } if(!job) { job = malloc(sizeof(*job)); memset(job, 0, sizeof(*job)); job->jobid = jobid; job->operations = list_create(); job->status = MPI_QUEUE_JOB_WAITING; job->worker_rank = -1; itable_insert(waiting_jobs, jobid, job); } list_push_tail(job->operations, op); } idle_stoptime = time(0) + idle_timeout; } else { link_close(master); master = 0; sleep(5); } int num_waiting_jobs = itable_size(waiting_jobs); int num_unvisited_jobs = itable_size(active_jobs); for(i = 1; i < num_workers && (num_unvisited_jobs > 0 || num_waiting_jobs > 0); i++) { struct mpi_queue_job *job; struct mpi_queue_operation *op; int flag = 0; UINT64_T jobid; if(!workers[i]) { if(num_waiting_jobs) { itable_firstkey(waiting_jobs); itable_nextkey(waiting_jobs, &jobid, (void **)&job); itable_remove(waiting_jobs, jobid); itable_insert(active_jobs, jobid, job); workers[i] = job; num_waiting_jobs--; job->worker_rank = i; job->status = MPI_QUEUE_JOB_READY; } else { continue; } } else { num_unvisited_jobs--; if(workers[i]->status == MPI_QUEUE_JOB_BUSY) { MPI_Test(&workers[i]->request, &flag, &workers[i]->mpi_status); if(flag) { op = list_pop_head(workers[i]->operations); if(op->output_length) { op->output_buffer = malloc(op->output_length); MPI_Recv(op->output_buffer, op->output_length, MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD, &workers[i]->mpi_status); } workers[i]->status = MPI_QUEUE_JOB_READY; if(op->type == MPI_QUEUE_OP_WORK || op->result < 0) { if(workers[i]->output) free(workers[i]->output); workers[i]->output = op->output_buffer; op->output_buffer = NULL; workers[i]->output_length = op->output_length; workers[i]->result = op->result; if(op->result < 0) { workers[i]->status = MPI_QUEUE_JOB_FAILED | op->type; op->type = MPI_QUEUE_OP_CLOSE; list_push_head(workers[i]->operations, op); op = NULL; } } if(op) { if(op->buffer) free(op->buffer); if(op->output_buffer) free(op->output_buffer); free(op); } } } } if( workers[i]->status != MPI_QUEUE_JOB_BUSY && list_size(workers[i]->operations)) { op = list_peek_head(workers[i]->operations); if(op->type == MPI_QUEUE_OP_CLOSE) { itable_remove(active_jobs, workers[i]->jobid); list_push_tail(complete_jobs, workers[i]); if(!(workers[i]->status & MPI_QUEUE_JOB_FAILED)) workers[i]->status = MPI_QUEUE_JOB_COMPLETE; workers[i] = NULL; i--; continue; } MPI_Send(op, sizeof(*op), MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD); if(op->buffer_length) { MPI_Send(op->buffer, op->buffer_length, MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD); free(op->buffer); op->buffer_length = 0; op->buffer = NULL; } MPI_Irecv(op, sizeof(*op), MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD, &workers[i]->request); workers[i]->status = MPI_QUEUE_JOB_BUSY; } } } /** Clean up waiting & complete jobs, send Exit commands to each worker */ if(!master) { // If the master link hasn't been set up yet // the workers will be waiting for the working directory char line[MPI_QUEUE_LINE_MAX]; memset(line, 0, MPI_QUEUE_LINE_MAX); MPI_Bcast(line, MPI_QUEUE_LINE_MAX, MPI_CHAR, 0, MPI_COMM_WORLD); } else { link_close(master); } for(i = 1; i < num_workers; i++) { struct mpi_queue_operation *op, close; memset(&close, 0, sizeof(close)); close.type = MPI_QUEUE_OP_EXIT; if(workers[i]) { if(workers[i]->status == MPI_QUEUE_JOB_BUSY) { MPI_Wait(&workers[i]->request, &workers[i]->mpi_status); op = list_peek_head(workers[i]->operations); if(op->output_length) { op->output_buffer = malloc(op->output_length); MPI_Recv(op->output_buffer, op->output_length, MPI_BYTE, workers[i]->worker_rank, 0, MPI_COMM_WORLD, &workers[i]->mpi_status); } } itable_remove(active_jobs, workers[i]->jobid); list_push_tail(complete_jobs, workers[i]); } MPI_Send(&close, sizeof(close), MPI_BYTE, i, 0, MPI_COMM_WORLD); } itable_firstkey(waiting_jobs); while(itable_size(waiting_jobs)) { struct mpi_queue_job *job; UINT64_T jobid; itable_nextkey(waiting_jobs, &jobid, (void **)&job); itable_remove(waiting_jobs, jobid); list_push_tail(complete_jobs, job); } while(list_size(complete_jobs)) { mpi_queue_job_delete(list_pop_head(complete_jobs)); } MPI_Finalize(); return abort_flag; }
int main(int argc, char* argv[]) { // Parse test command line arguments, perform early // initializations. const char *name, *mode; int n, nt, sx, sy, ss, rank, szcomm; #ifdef CUDA struct cudaDeviceProp props; #endif test_parse(argc, argv, &name, &mode, &n, &nt, &sx, &sy, &ss, &rank, &szcomm #ifdef CUDA , &props #endif ); #ifdef CUDA int cpu = !strcmp(mode, "CPU"); int gpu = !strcmp(mode, "GPU"); #else int cpu = 1; int gpu = 0; #endif // Create test configuration. struct test_config_t* t = test_init( name, mode, n, nt, sx, sy, ss, rank, szcomm, xmin, ymin, zmin, xmax, ymax, zmax, bx, by, bs, ex, ey, es #ifdef CUDA , &props #endif ); // Create another test configuration to check correctness. struct test_config_t* t_check = NULL; #ifdef MPI if (t->rank == MPI_ROOT_NODE) #endif { t_check = test_init( name, mode, n, nt, 1, 1, 1, 0, 1, xmin, ymin, zmin, xmax, ymax, zmax, bx, by, bs, ex, ey, es #ifdef CUDA , &props #endif ); } // Generate the initial data disrtibution and load it // onto compute nodes. integer* array = (integer*)malloc(t->cpu.parent->grid->extsize * sizeof(integer)); genirand(t->cpu.parent->grid->extsize, array); test_load(t, n, sx, sy, ss, sizeof(integer), (char*)array); #ifdef MPI if (t->rank == MPI_ROOT_NODE) #endif { size_t nxysb = n * n * n * sizeof(integer); // Copy the data array. memcpy(t_check->cpu.arrays[0], array, nxysb); // Duplicate initial distribution to the second level array. memcpy(t_check->cpu.arrays[1], t_check->cpu.arrays[0], nxysb); } free(array); #ifdef VERBOSE printf("step 0\n"); printf("step 1\n"); #endif // The time iterations loop, CPU and GPU versions. for (int it = 2; it < t->nt; it++) { // Run one iteration of the stencil, measuring its time. // In case of MPI, the time of iteration is measured together // with the time of data sync. struct timespec start, stop; #ifdef MPI if (t->rank == MPI_ROOT_NODE) #endif { stenfw_get_time(&start); } #ifdef MPI struct grid_domain_t* subdomains = t->cpu.subdomains; int nsubdomains = t->cpu.nsubdomains; // Copy the current iteration data into boundary slices // and compute stencil in them. // Boundary slices themselves are subdomains with respect // to each MPI decomposition domains. { // Set subdomain data copying callbacks: // use simple memcpy in this case. for (int i = 0; i < nsubdomains; i++) { struct grid_domain_t* sub = subdomains + i; sub->scatter_memcpy = &grid_subcpy; sub->gather_memcpy = &grid_subcpy; } // Scatter domain edges for separate computation. grid_scatter(subdomains, &t->cpu, 0, LAYOUT_MODE_CUSTOM); // Process edges subdomains. for (int i = 0; i < nsubdomains; i++) { struct grid_domain_t* sub = subdomains + i; int nx = sub->grid[0].bx + sub->grid[0].nx + sub->grid[0].ex; int ny = sub->grid[0].by + sub->grid[0].ny + sub->grid[0].ey; int ns = sub->grid[0].bs + sub->grid[0].ns + sub->grid[0].es; isum13pt_cpu(nx, ny, ns, (integer(*)[ny][nx])sub->arrays[0], (integer(*)[ny][nx])sub->arrays[1], (integer(*)[ny][nx])sub->arrays[2]); } } // Start sharing boundary slices between linked subdomains. MPI_Request* reqs = (MPI_Request*)malloc(sizeof(MPI_Request) * 2 * nsubdomains); for (int i = 0; i < nsubdomains; i++) { struct grid_domain_t* subdomain = subdomains + i; struct grid_domain_t* neighbor = *(subdomain->links.dense[0]); assert(neighbor->grid[1].extsize == subdomain->grid[0].extsize); int szelem = sizeof(integer); size_t dnx = neighbor->grid[1].nx * szelem; size_t dny = neighbor->grid[1].ny; size_t dns = neighbor->grid[1].ns; size_t snx = subdomain->grid[0].nx * szelem; size_t sbx = subdomain->grid[0].bx * szelem; size_t sex = subdomain->grid[0].ex * szelem; size_t sny = subdomain->grid[0].ny, sns = subdomain->grid[0].ns; size_t sby = subdomain->grid[0].by, sbs = subdomain->grid[0].bs; size_t sey = subdomain->grid[0].ey, ses = subdomain->grid[0].es; size_t soffset = sbx + (sbx + snx + sex) * (sby + sbs * (sby + sny + sey)); struct grid_domain_t obuf; memset(&obuf, 0, sizeof(struct grid_domain_t)); obuf.arrays = subdomain->arrays + 1; obuf.narrays = 1; obuf.offset = 0; obuf.grid[0].nx = dnx; obuf.grid[0].ny = dny; obuf.grid[0].ns = dns; obuf.grid->size = dnx * dny * dns; struct grid_domain_t scpy = *subdomain; scpy.arrays = subdomain->arrays + 2; scpy.narrays = 1; scpy.offset = soffset; scpy.grid[0].nx = sbx + snx + sex; scpy.grid[0].ny = sby + sny + sey; scpy.grid[0].ns = sbs + sns + ses; // Copy data to the temporary buffer. grid_subcpy(dnx, dny, dns, &obuf, &scpy); // Exchange temporary buffers with the subdomain neighbour. int subdomain_rank = grid_rank1d(subdomain->parent->parent, subdomain->parent->grid); int neighbor_rank = grid_rank1d(neighbor->parent->parent, neighbor->parent->grid); MPI_SAFE_CALL(MPI_Isend(subdomain->arrays[1], obuf.grid->size, MPI_BYTE, neighbor_rank, 0, MPI_COMM_WORLD, &reqs[2 * i])); MPI_SAFE_CALL(MPI_Irecv(subdomain->arrays[0], obuf.grid->size, MPI_BYTE, neighbor_rank, 0, MPI_COMM_WORLD, &reqs[2 * i + 1])); #ifdef VERBOSE printf("sharing: send %d->%d\n", subdomain_rank, neighbor_rank); printf("sharing: recv %d->%d\n", neighbor_rank, subdomain_rank); #endif } #endif // MPI // Compute inner grid points of the subdomain. int nx = t->cpu.grid->bx + t->cpu.grid->nx + t->cpu.grid->ex; int ny = t->cpu.grid->by + t->cpu.grid->ny + t->cpu.grid->ey; int ns = t->cpu.grid->bs + t->cpu.grid->ns + t->cpu.grid->es; if (cpu) { isum13pt_cpu(nx, ny, ns, (integer(*)[ny][nx])t->cpu.arrays[0], (integer(*)[ny][nx])t->cpu.arrays[1], (integer(*)[ny][nx])t->cpu.arrays[2]); } #ifdef CUDA if (gpu) { isum13pt_gpu(nx, ny, ns, (integer*)t->gpu.arrays[0], (integer*)t->gpu.arrays[1], (integer*)t->gpu.arrays[2]); #ifdef VISUALIZE #ifndef CUDA_MAPPED // If GPU is not using mapped host memory, then need to fetch // the current iteration solution explicitly. // TODO: in case of MPI/CUDA/!MAPPED this copy must go AFTER // boundaries gathering. CUDA_SAFE_CALL(cudaMemcpy(t->cpu.arrays[2], t->gpu.arrays[2], t->gpu.grid->extsize * sizeof(real), cudaMemcpyDeviceToHost)); #endif // CUDA_MAPPED #endif } #endif // CUDA #ifdef MPI // Wait for boundaries sharing completion. MPI_Status* statuses = (MPI_Status*)malloc(2 * nsubdomains * sizeof(MPI_Status)); MPI_SAFE_CALL(MPI_Waitall(2 * nsubdomains, reqs, statuses)); for (int i = 0; i < 2 * nsubdomains; i++) MPI_SAFE_CALL(statuses[i].MPI_ERROR); free(statuses); free(reqs); for (int i = 0; i < nsubdomains; i++) { struct grid_domain_t* subdomain = subdomains + i; int szelem = sizeof(integer); size_t dnx = subdomain->grid[1].nx * szelem; size_t dbx = subdomain->grid[1].bx * szelem; size_t dex = subdomain->grid[1].ex * szelem; size_t dny = subdomain->grid[1].ny, dns = subdomain->grid[1].ns; size_t dby = subdomain->grid[1].by, dbs = subdomain->grid[1].bs; size_t dey = subdomain->grid[1].ey, des = subdomain->grid[1].es; size_t doffset = dbx + (dbx + dnx + dex) * (dby + dbs * (dby + dny + dey)); struct grid_domain_t dcpy = *subdomain; dcpy.arrays = subdomain->arrays + 2; dcpy.narrays = 1; dcpy.offset = doffset; dcpy.grid[0].nx = dbx + dnx + dex; dcpy.grid[0].ny = dby + dny + dey; dcpy.grid[0].ns = dbs + dns + des; struct grid_domain_t ibuf; memset(&ibuf, 0, sizeof(struct grid_domain_t)); ibuf.arrays = subdomain->arrays; ibuf.narrays = 1; ibuf.offset = 0; ibuf.grid[0].nx = dnx; ibuf.grid[0].ny = dny; ibuf.grid[0].ns = dns; // Copy data to temporary buffer. grid_subcpy(dnx, dny, dns, &dcpy, &ibuf); // Swap pointers to make the last iteration in the bottom. char* w = subdomain->arrays[0]; subdomain->arrays[0] = subdomain->arrays[2]; subdomain->arrays[2] = w; } // Gather bounradies on for the next time step. Insert the // separately computed boundaries back into the sudomains // for the next time step. struct grid_domain_t target = t->cpu; target.narrays = 1; target.arrays = t->cpu.arrays + 2; grid_gather(&target, subdomains, 1, LAYOUT_MODE_CUSTOM); if (t->rank != MPI_ROOT_NODE) { #ifdef VERBOSE printf("step %d\n", it); #endif } else #endif // MPI { stenfw_get_time(&stop); printf("step %d time = ", it); stenfw_print_time_diff(start, stop); printf(" sec\n"); } #ifdef MPI if (t->rank == MPI_ROOT_NODE) #endif { // Compute inner grid points of the control solution subdomain. int nx = t_check->cpu.grid->bx + t_check->cpu.grid->nx + t_check->cpu.grid->ex; int ny = t_check->cpu.grid->by + t_check->cpu.grid->ny + t_check->cpu.grid->ey; int ns = t_check->cpu.grid->bs + t_check->cpu.grid->ns + t_check->cpu.grid->es; isum13pt_cpu(nx, ny, ns, (integer(*)[ny][nx])t_check->cpu.arrays[0], (integer(*)[ny][nx])t_check->cpu.arrays[1], (integer(*)[ny][nx])t_check->cpu.arrays[2]); } // Print the stats of difference between the solution and // the control solution. test_write_imaxabsdiff(t, t_check, 2, it); // Swap pointers to rewrite the oldest iteration with // the next one. char* w = t->cpu.arrays[0]; t->cpu.arrays[0] = t->cpu.arrays[1]; t->cpu.arrays[1] = t->cpu.arrays[2]; t->cpu.arrays[2] = w; #ifdef CUDA if (gpu) { // Also swap the corresponding GPU arrays pointers. w = t->gpu.arrays[0]; t->gpu.arrays[0] = t->gpu.arrays[1]; t->gpu.arrays[1] = t->gpu.arrays[2]; t->gpu.arrays[2] = w; } #endif #ifdef MPI if (t->rank == MPI_ROOT_NODE) #endif { // Swap pointers to rewrite the oldest control solution // iteration with the next one. char* w = t_check->cpu.arrays[0]; t_check->cpu.arrays[0] = t_check->cpu.arrays[1]; t_check->cpu.arrays[1] = t_check->cpu.arrays[2]; t_check->cpu.arrays[2] = w; } } // Dispose the test configurations. #ifdef MPI if (t->rank == MPI_ROOT_NODE) #endif { test_dispose(t_check); } test_dispose(t); return 0; }
/*@ MPI_Intercomm_create - Creates an intercommuncator from two intracommunicators Input Paramters: + local_comm - Local (intra)communicator . local_leader - Rank in local_comm of leader (often 0) . peer_comm - Remote communicator . remote_leader - Rank in peer_comm of remote leader (often 0) - tag - Message tag to use in constructing intercommunicator; if multiple 'MPI_Intercomm_creates' are being made, they should use different tags (more precisely, ensure that the local and remote leaders are using different tags for each 'MPI_intercomm_create'). Output Parameter: . comm_out - Created intercommunicator Notes: The MPI 1.1 Standard contains two mutually exclusive comments on the input intracommunicators. One says that their repective groups must be disjoint; the other that the leaders can be the same process. After some discussion by the MPI Forum, it has been decided that the groups must be disjoint. Note that the `reason` given for this in the standard is `not` the reason for this choice; rather, the `other` operations on intercommunicators (like 'MPI_Intercomm_merge') do not make sense if the groups are not disjoint. .N fortran Algorithm: + 1) Allocate a send context, an inter-coll context, and an intra-coll context . 2) Send "send_context" and lrank_to_grank list from local comm group if I''m the local_leader. . 3) If I''m the local leader, then wait on the posted sends and receives to complete. Post the receive for the remote group information and wait for it to complete. . 4) Broadcast information received from the remote leader. . 5) Create the inter_communicator from the information we now have. - An inter-communicator ends up with three levels of communicators. The inter-communicator returned to the user, a "collective" inter-communicator that can be used for safe communications between local & remote groups, and a collective intra-communicator that can be used to allocate new contexts during the merge and dup operations. For the resulting inter-communicator, 'comm_out' .vb comm_out = inter-communicator comm_out->comm_coll = "collective" inter-communicator comm_out->comm_coll->comm_coll = safe collective intra-communicator .ve .N Errors .N MPI_SUCCESS .N MPI_ERR_COMM .N MPI_ERR_TAG .N MPI_ERR_EXHAUSTED .N MPI_ERR_RANK .seealso: MPI_Intercomm_merge, MPI_Comm_free, MPI_Comm_remote_group, MPI_Comm_remote_size @*/ EXPORT_MPI_API int MPI_Intercomm_create ( MPI_Comm local_comm, int local_leader, MPI_Comm peer_comm, int remote_leader, int tag, MPI_Comm *comm_out ) { int local_size, local_rank, peer_size, peer_rank; int remote_size; int mpi_errno = MPI_SUCCESS; MPIR_CONTEXT context, send_context; struct MPIR_GROUP *remote_group_ptr; struct MPIR_COMMUNICATOR *new_comm, *local_comm_ptr, *peer_comm_ptr; MPI_Request req[6]; MPI_Status status[6]; MPIR_ERROR_DECL; static char myname[]="MPI_INTERCOMM_CREATE"; TR_PUSH(myname); local_comm_ptr = MPIR_GET_COMM_PTR(local_comm); #ifndef MPIR_NO_ERROR_CHECKING /* Check for valid arguments to function */ MPIR_TEST_MPI_COMM(local_comm,local_comm_ptr,local_comm_ptr,myname); MPIR_TEST_SEND_TAG(tag); if (mpi_errno) return MPIR_ERROR(local_comm_ptr, mpi_errno, myname ); #endif if (local_comm == MPI_COMM_NULL) { mpi_errno = MPIR_Err_setmsg( MPI_ERR_COMM, MPIR_ERR_LOCAL_COMM, myname, "Local communicator must not be MPI_COMM_NULL", (char *)0 ); return MPIR_ERROR( local_comm_ptr, mpi_errno, myname ); } (void) MPIR_Comm_size ( local_comm_ptr, &local_size ); (void) MPIR_Comm_rank ( local_comm_ptr, &local_rank ); if ( local_leader == local_rank ) { /* Peer_comm need be valid only at local_leader */ peer_comm_ptr = MPIR_GET_COMM_PTR(peer_comm); if ((MPIR_TEST_COMM_NOTOK(peer_comm,peer_comm_ptr) || (peer_comm == MPI_COMM_NULL))) { mpi_errno = MPIR_Err_setmsg( MPI_ERR_COMM, MPIR_ERR_PEER_COMM, myname, "Peer communicator is not valid", (char *)0 ); return MPIR_ERROR( local_comm_ptr, mpi_errno, myname ); } (void) MPIR_Comm_size ( peer_comm_ptr, &peer_size ); (void) MPIR_Comm_rank ( peer_comm_ptr, &peer_rank ); if (((peer_rank == MPI_UNDEFINED) && (mpi_errno = MPI_ERR_RANK))) return MPIR_ERROR( local_comm_ptr, mpi_errno, myname ); if (((remote_leader >= peer_size) && (mpi_errno = MPI_ERR_RANK)) || ((remote_leader < 0) && (mpi_errno = MPI_ERR_RANK))) { mpi_errno = MPIR_Err_setmsg( MPI_ERR_RANK, MPIR_ERR_REMOTE_RANK, myname, "Error specifying remote_leader", "Error specifying remote_leader; value %d not between 0 and %d", remote_leader, peer_size ); return MPIR_ERROR( local_comm_ptr, mpi_errno, myname ); } } if (((local_leader >= local_size) && (mpi_errno = MPI_ERR_RANK)) || ((local_leader < 0) && (mpi_errno = MPI_ERR_RANK))) { mpi_errno = MPIR_Err_setmsg( MPI_ERR_RANK, MPIR_ERR_LOCAL_RANK, myname, "Error specifying local_leader", "Error specifying local_leader; value %d not in between 0 and %d", local_leader, local_size ); return MPIR_ERROR( local_comm_ptr, mpi_errno, myname ); } /* Allocate send context, inter-coll context and intra-coll context */ MPIR_Context_alloc ( local_comm_ptr, 3, &context ); /* If I'm the local leader, then exchange information */ if (local_rank == local_leader) { MPIR_ERROR_PUSH(peer_comm_ptr); /* Post the receives for the information from the remote_leader */ /* We don't post a receive for the remote group yet, because we */ /* don't know how big it is yet. */ MPIR_CALL_POP(MPI_Irecv (&remote_size, 1, MPI_INT, remote_leader, tag, peer_comm, &(req[2])),peer_comm_ptr,myname); MPIR_CALL_POP(MPI_Irecv (&send_context, 1, MPIR_CONTEXT_TYPE, remote_leader,tag, peer_comm, &(req[3])), peer_comm_ptr,myname); /* Send the lrank_to_grank table of the local_comm and an allocated */ /* context. Currently I use multiple messages to send this info. */ /* Eventually, this will change(?) */ MPIR_CALL_POP(MPI_Isend (&local_size, 1, MPI_INT, remote_leader, tag, peer_comm, &(req[0])),peer_comm_ptr,myname); MPIR_CALL_POP(MPI_Isend (&context, 1, MPIR_CONTEXT_TYPE, remote_leader, tag, peer_comm, &(req[1])),peer_comm_ptr,myname); /* Wait on the communication requests to finish */ MPIR_CALL_POP(MPI_Waitall ( 4, req, status ),peer_comm_ptr,myname); /* We now know how big the remote group is, so create it */ remote_group_ptr = MPIR_CreateGroup ( remote_size ); remote_group_ptr->self = (MPI_Group) MPIR_FromPointer( remote_group_ptr ); /* Post the receive for the group information */ MPIR_CALL_POP(MPI_Irecv (remote_group_ptr->lrank_to_grank, remote_size, MPI_INT, remote_leader, tag, peer_comm, &(req[5])),peer_comm_ptr,myname); /* Send the local group info to the remote group */ MPIR_CALL_POP(MPI_Isend (local_comm_ptr->group->lrank_to_grank, local_size, MPI_INT, remote_leader, tag, peer_comm, &(req[4])),peer_comm_ptr,myname); /* wait on the send and the receive for the group information */ MPIR_CALL_POP(MPI_Waitall ( 2, &(req[4]), &(status[4]) ),peer_comm_ptr, myname); MPIR_ERROR_POP(peer_comm_ptr); /* Now we can broadcast the group information to the other local comm */ /* members. */ MPIR_ERROR_PUSH(local_comm_ptr); MPIR_CALL_POP(MPI_Bcast(&remote_size,1,MPI_INT,local_rank,local_comm), local_comm_ptr,myname); MPIR_CALL_POP(MPI_Bcast(remote_group_ptr->lrank_to_grank, remote_size, MPI_INT, local_rank, local_comm),local_comm_ptr, myname); MPIR_ERROR_POP(local_comm_ptr); } /* Else I'm just an ordinary comm member, so receive the bcast'd */ /* info about the remote group */ else { MPIR_ERROR_PUSH(local_comm_ptr); MPIR_CALL_POP(MPI_Bcast(&remote_size, 1, MPI_INT, local_leader, local_comm),local_comm_ptr,myname); /* We now know how big the remote group is, so create it */ remote_group_ptr = MPIR_CreateGroup ( remote_size ); remote_group_ptr->self = (MPI_Group) MPIR_FromPointer( remote_group_ptr ); /* Receive the group info */ MPIR_CALL_POP(MPI_Bcast(remote_group_ptr->lrank_to_grank, remote_size, MPI_INT, local_leader, local_comm), local_comm_ptr,myname ); MPIR_ERROR_POP(local_comm_ptr); } MPIR_ERROR_PUSH(local_comm_ptr); /* Broadcast the send context */ MPIR_CALL_POP(MPI_Bcast(&send_context, 1, MPIR_CONTEXT_TYPE, local_leader, local_comm),local_comm_ptr,myname); MPIR_ERROR_POP(local_comm_ptr); /* We all now have all the information necessary, start building the */ /* inter-communicator */ MPIR_ALLOC(new_comm,NEW(struct MPIR_COMMUNICATOR),local_comm_ptr, MPI_ERR_EXHAUSTED,myname ); MPIR_Comm_init( new_comm, local_comm_ptr, MPIR_INTER ); *comm_out = new_comm->self; new_comm->group = remote_group_ptr; MPIR_Group_dup( local_comm_ptr->group, &(new_comm->local_group) ); new_comm->local_rank = new_comm->local_group->local_rank; new_comm->lrank_to_grank = new_comm->group->lrank_to_grank; new_comm->np = new_comm->group->np; new_comm->send_context = send_context; new_comm->recv_context = context; new_comm->comm_name = 0; if ((mpi_errno = MPID_CommInit( local_comm_ptr, new_comm )) ) return mpi_errno; (void) MPIR_Attr_create_tree ( new_comm ); /* Build the collective inter-communicator */ MPIR_Comm_make_coll( new_comm, MPIR_INTER ); MPIR_Comm_make_onesided( new_comm, MPIR_INTER ); /* Build the collective intra-communicator. Note that we require an intra-communicator for the "coll_comm" so that MPI_COMM_DUP can use it for some collective operations (do we need this for MPI-2 with intercommunicator collective?) Note that this really isn't the right thing to do; we need to replace *all* of the Mississippi state collective code. */ MPIR_Comm_make_coll( new_comm->comm_coll, MPIR_INTRA ); #if 0 MPIR_Comm_make_coll( new_comm->comm_onesided, MPIR_INTRA ); #endif /* Remember it for the debugger */ MPIR_Comm_remember ( new_comm ); TR_POP; return (mpi_errno); }
void compute(int rank, int size) { double *row1a = (double *) malloc(sizeof(double) * size_x); double *row1b = (double *) malloc(sizeof(double) * size_x); memset(row1a, 0, sizeof(double *) * size_x); double *row2a = (double *) malloc(sizeof(double) * size_x); double *row2b = (double *) malloc(sizeof(double) * size_x); memset(row2a, 0, sizeof(double *) * size_x); int position = id_to_position(size_y, size, rank); int height = id_to_size(size_y, size, rank); MPI_Request req[2]; MPI_Request req2[2]; DoubleMatrix matrix(size_x, height); set_fixed_temp(matrix, size_y, position, temp); matrix.swap(); compute_new_values(matrix, row1a, row2a); set_fixed_temp(matrix, size_y, position, temp); matrix.swap(); for (int i = 1; i < iterations; i++) { MPI_Isend(row1a, size_x, MPI_DOUBLE, (rank + size - 1) % size, TAG_ROW1, MPI_COMM_WORLD, &req[0]); MPI_Isend(row2a, size_x, MPI_DOUBLE, (rank + 1) % size, TAG_ROW2, MPI_COMM_WORLD, &req[1]); MPI_Irecv(row1b, size_x, MPI_DOUBLE, (rank + size - 1) % size, TAG_ROW2, MPI_COMM_WORLD, &req2[0]); MPI_Irecv(row2b, size_x, MPI_DOUBLE, (rank + 1) % size, TAG_ROW1, MPI_COMM_WORLD, &req2[1]); MPI_Waitall(2, req2, MPI_STATUSES_IGNORE); double *tmp; tmp = row1a; // swap row1a <-> row1b row1a = row1b; row1b = tmp; tmp = row2a; // swap row2a <-> row2b row2a = row2b; row2b = tmp; compute_new_values(matrix, row1a, row2a); set_fixed_temp(matrix, size_y, position, temp); matrix.swap(); MPI_Waitall(2, req, MPI_STATUSES_IGNORE); } free(row1a); free(row1b); free(row2a); free(row2b); if (rank == 0) { DoubleMatrix out(size_x, size_y); out.set_data(matrix.get_data(), size_x * position, matrix.get_data_size()); for (int rank = 1; rank < size; rank++) { int position = id_to_position(size_y, size, rank); int height = id_to_size(size_y, size, rank); MPI_Recv(out.get_write_pointer(0, position), size_x * height, MPI_DOUBLE, rank, TAG_MATRIX, MPI_COMM_WORLD, MPI_STATUSES_IGNORE); } out.swap(); out.write_to_file("result2.html"); } else { MPI_Send(matrix.get_data(), matrix.get_size_x() * matrix.get_size_y(), MPI_DOUBLE, 0, TAG_MATRIX, MPI_COMM_WORLD); } }
va_list ap; va_start(ap, unknown); buf = unknown; if (_numargs() == NUMPARAMS+1) { buflen = va_arg(ap, int) /8; /* This is in bits. */ } count = va_arg (ap, int *); datatype = va_arg(ap, MPI_Datatype*); source = va_arg(ap, int *); tag = va_arg(ap, int *); comm = va_arg(ap, MPI_Comm*); request = va_arg(ap, MPI_Request *); __ierr = va_arg(ap, int *); *__ierr = MPI_Irecv(MPIR_F_PTR(buf),*count,*datatype,*source,*tag,*comm, &lrequest); *(int*)request = MPI_Request_c2f(lrequest); } #else void mpi_irecv_( buf, count, datatype, source, tag, comm, request, __ierr ) void *buf; int*count; MPI_Datatype * datatype; int*source; int*tag; MPI_Comm *comm; MPI_Request *request; int *__ierr; { MPI_Request lrequest;
void Grid3D::Load_and_Send_MPI_Comm_Buffers_BLOCK(int dir, int *flags) { int i, j, k, ii; int gidx; int idx; int offset; int ireq; ireq = 0; /* x boundaries */ if(dir == 0) { if (flags[0]==5) { // load left x communication buffer // 1D if (H.ny == 1 && H.nz == 1) { offset = H.n_ghost; for (i=0;i<H.n_ghost;i++) { idx = (i+H.n_ghost); gidx = i; for (ii=0; ii<H.n_fields; ii++) { *(send_buffer_x0 + gidx + ii*offset) = C.density[idx + ii*H.n_cells]; } } } // 2D if (H.ny > 1 && H.nz == 1) { offset = H.n_ghost*(H.ny-2*H.n_ghost); for (i=0;i<H.n_ghost;i++) { for (j=0;j<H.ny-2*H.n_ghost;j++) { idx = (i+H.n_ghost) + (j+H.n_ghost)*H.nx; gidx = i + j*H.n_ghost; for (ii=0; ii<H.n_fields; ii++) { *(send_buffer_x0 + gidx + ii*offset) = C.density[idx + ii*H.n_cells]; } } } } // 3D if (H.ny > 1 && H.nz > 1) { offset = H.n_ghost*(H.ny-2*H.n_ghost)*(H.nz-2*H.n_ghost); for(i=0;i<H.n_ghost;i++) { for(j=0;j<H.ny-2*H.n_ghost;j++) { for(k=0;k<H.nz-2*H.n_ghost;k++) { idx = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny; gidx = i + j*H.n_ghost + k*H.n_ghost*(H.ny-2*H.n_ghost); for (ii=0; ii<H.n_fields; ii++) { *(send_buffer_x0 + gidx + ii*offset) = C.density[idx + ii*H.n_cells]; } } } } } //post non-blocking receive left x communication buffer MPI_Irecv(recv_buffer_x0, x_buffer_length, MPI_CHREAL, source[0], 0, world, &recv_request[ireq]); //non-blocking send left x communication buffer MPI_Isend(send_buffer_x0, x_buffer_length, MPI_CHREAL, dest[0], 1, world, &send_request[0]); //keep track of how many sends and receives are expected ireq++; } if(flags[1]==5) { // load right x communication buffer // 1D if (H.ny == 1 && H.nz == 1) { offset = H.n_ghost; for (i=0;i<H.n_ghost;i++) { idx = (i+H.nx-2*H.n_ghost); gidx = i; for (ii=0; ii<H.n_fields; ii++) { *(send_buffer_x1 + gidx + ii*offset) = C.density[idx + ii*H.n_cells]; } } } // 2D if (H.ny > 1 && H.nz == 1) { offset = H.n_ghost*(H.ny-2*H.n_ghost); for (i=0;i<H.n_ghost;i++) { for (j=0;j<H.ny-2*H.n_ghost;j++) { idx = (i+H.nx-2*H.n_ghost) + (j+H.n_ghost)*H.nx; gidx = i + j*H.n_ghost; for (ii=0; ii<H.n_fields; ii++) { *(send_buffer_x1 + gidx + ii*offset) = C.density[idx + ii*H.n_cells]; } } } } // 3D if (H.ny > 1 && H.nz > 1) { offset = H.n_ghost*(H.ny-2*H.n_ghost)*(H.nz-2*H.n_ghost); for(i=0;i<H.n_ghost;i++) { for(j=0;j<H.ny-2*H.n_ghost;j++) { for(k=0;k<H.nz-2*H.n_ghost;k++) { idx = (i+H.nx-2*H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny; gidx = i + j*H.n_ghost + k*H.n_ghost*(H.ny-2*H.n_ghost); for (ii=0; ii<H.n_fields; ii++) { *(send_buffer_x1 + gidx + ii*offset) = C.density[idx + ii*H.n_cells]; } } } } } //post non-blocking receive right x communication buffer MPI_Irecv(recv_buffer_x1, x_buffer_length, MPI_CHREAL, source[1], 1, world, &recv_request[ireq]); //non-blocking send right x communication buffer MPI_Isend(send_buffer_x1, x_buffer_length, MPI_CHREAL, dest[1], 0, world, &send_request[1]); //keep track of how many sends and receives are expected ireq++; } } /* y boundaries */ if (dir==1) { // load left y communication buffer if(flags[2] == 5) { // 2D if (H.nz == 1) { offset = H.n_ghost*H.nx; for (i=0;i<H.nx;i++) { for (j=0;j<H.n_ghost;j++) { idx = i + (j+H.n_ghost)*H.nx; gidx = i + j*H.nx; for (ii=0; ii<H.n_fields; ii++) { *(send_buffer_y0 + gidx + ii*offset) = C.density[idx + ii*H.n_cells]; } } } } // 3D if (H.nz > 1) { offset = H.n_ghost*H.nx*(H.nz-2*H.n_ghost); for(i=0;i<H.nx;i++) { for(j=0;j<H.n_ghost;j++) { for(k=0;k<H.nz-2*H.n_ghost;k++) { idx = i + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny; gidx = i + j*H.nx + k*H.nx*H.n_ghost; for (ii=0; ii<H.n_fields; ii++) { *(send_buffer_y0 + gidx + ii*offset) = C.density[idx + ii*H.n_cells]; } } } } } //post non-blocking receive left y communication buffer MPI_Irecv(recv_buffer_y0, y_buffer_length, MPI_CHREAL, source[2], 2, world, &recv_request[ireq]); //non-blocking send left y communication buffer MPI_Isend(send_buffer_y0, y_buffer_length, MPI_CHREAL, dest[2], 3, world, &send_request[0]); //keep track of how many sends and receives are expected ireq++; } // load right y communication buffer if(flags[3]==5) { // 2D if (H.nz == 1) { offset = H.n_ghost*H.nx; for (i=0;i<H.nx;i++) { for (j=0;j<H.n_ghost;j++) { idx = i + (j+H.ny-2*H.n_ghost)*H.nx; gidx = i + j*H.nx; for (ii=0; ii<H.n_fields; ii++) { *(send_buffer_y1 + gidx + ii*offset) = C.density[idx + ii*H.n_cells]; } } } } // 3D if (H.nz > 1) { offset = H.n_ghost*H.nx*(H.nz-2*H.n_ghost); for(i=0;i<H.nx;i++) { for(j=0;j<H.n_ghost;j++) { for(k=0;k<H.nz-2*H.n_ghost;k++) { idx = i + (j+H.ny-2*H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny; gidx = i + j*H.nx + k*H.nx*H.n_ghost; for (ii=0; ii<H.n_fields; ii++) { *(send_buffer_y1 + gidx + ii*offset) = C.density[idx + ii*H.n_cells]; } } } } } //post non-blocking receive right y communication buffer MPI_Irecv(recv_buffer_y1, y_buffer_length, MPI_CHREAL, source[3], 3, world, &recv_request[ireq]); //non-blocking send right y communication buffer MPI_Isend(send_buffer_y1, y_buffer_length, MPI_CHREAL, dest[3], 2, world, &send_request[1]); //keep track of how many sends and receives are expected ireq++; } } /* z boundaries */ if (dir==2) { // left z communication buffer if(flags[4]==5) { // 3D offset = H.n_ghost*H.nx*H.ny; for(i=0;i<H.nx;i++) { for(j=0;j<H.ny;j++) { for(k=0;k<H.n_ghost;k++) { idx = i + j*H.nx + (k+H.n_ghost)*H.nx*H.ny; gidx = i + j*H.nx + k*H.nx*H.ny; for (ii=0; ii<H.n_fields; ii++) { *(send_buffer_z0 + gidx + ii*offset) = C.density[idx + ii*H.n_cells]; } } } } //post non-blocking receive left z communication buffer MPI_Irecv(recv_buffer_z0, z_buffer_length, MPI_CHREAL, source[4], 4, world, &recv_request[ireq]); //non-blocking send left z communication buffer MPI_Isend(send_buffer_z0, z_buffer_length, MPI_CHREAL, dest[4], 5, world, &send_request[0]); //keep track of how many sends and receives are expected ireq++; } // load right z communication buffer if(flags[5]==5) { offset = H.n_ghost*H.nx*H.ny; for(i=0;i<H.nx;i++) { for(j=0;j<H.ny;j++) { for(k=0;k<H.n_ghost;k++) { idx = i + j*H.nx + (k+H.nz-2*H.n_ghost)*H.nx*H.ny; gidx = i + j*H.nx + k*H.nx*H.ny; for (ii=0; ii<H.n_fields; ii++) { *(send_buffer_z1 + gidx + ii*offset) = C.density[idx + ii*H.n_cells]; } } } } //post non-blocking receive right x communication buffer MPI_Irecv(recv_buffer_z1, z_buffer_length, MPI_CHREAL, source[5], 5, world, &recv_request[ireq]); //non-blocking send right x communication buffer MPI_Isend(send_buffer_z1, z_buffer_length, MPI_CHREAL, dest[5], 4, world, &send_request[1]); //keep track of how many sends and receives are expected ireq++; } } }
void Grid3D::Load_and_Send_MPI_Comm_Buffers_SLAB(int *flags) { int i, j, k, ii; int gidx; int idx; int ireq = 0; int offset = H.n_ghost*H.ny*H.nz; /*check left side*/ if(flags[0]==5) { //load left x communication buffer for(i=0;i<H.n_ghost;i++) { for(j=0;j<H.ny;j++) { for(k=0;k<H.nz;k++) { idx = (i+H.n_ghost) + j*H.nx + k*H.nx*H.ny; gidx = i + j*H.n_ghost + k*H.n_ghost*H.ny; for (ii=0; ii<H.n_fields; ii++) { *(send_buffer_0 + gidx + ii*offset) = C.density[ii*H.n_cells + idx]; } } } } //post non-blocking receive left x communication buffer MPI_Irecv(recv_buffer_0, recv_buffer_length, MPI_CHREAL, source[0], 0, world, &recv_request[ireq]); //non-blocking send left x communication buffer MPI_Isend(send_buffer_0, send_buffer_length, MPI_CHREAL, dest[0], 1, world, &send_request[0]); //remember how many recv's this proc expects ireq++; } /*check right side*/ if(flags[1]==5) { //load right x communication buffer for(i=0;i<H.n_ghost;i++) { for(j=0;j<H.ny;j++) { for(k=0;k<H.nz;k++) { idx = (i+H.nx-2*H.n_ghost) + j*H.nx + k*H.nx*H.ny; gidx = i + j*H.n_ghost + k*H.n_ghost*H.ny; for (ii=0; ii<H.n_fields; ii++) { *(send_buffer_1 + gidx + ii*offset) = C.density[ii*H.n_cells + idx]; } } } } //post non-blocking receive right x communication buffer MPI_Irecv(recv_buffer_1, recv_buffer_length, MPI_CHREAL, source[1], 1, world, &recv_request[ireq]); //non-blocking send right x communication buffer MPI_Isend(send_buffer_1, send_buffer_length, MPI_CHREAL, dest[1], 0, world, &send_request[1]); //remember how many recv's this proc expects ireq++; } //done! }
/* Sets error_code to MPI_SUCCESS if successful, or creates an error code * in the case of error. */ static void ADIOI_LUSTRE_W_Exchange_data(ADIO_File fd, const void *buf, char *write_buf, ADIOI_Flatlist_node *flat_buf, ADIO_Offset *offset_list, ADIO_Offset *len_list, int *send_size, int *recv_size, ADIO_Offset off, int size, int *count, int *start_pos, int *sent_to_proc, int nprocs, int myrank, int buftype_is_contig, int contig_access_count, int *striping_info, ADIOI_Access *others_req, int *send_buf_idx, int *curr_to_proc, int *done_to_proc, int *hole, int iter, MPI_Aint buftype_extent, int *buf_idx, ADIO_Offset **srt_off, int **srt_len, int *srt_num, int *error_code) { int i, j, nprocs_recv, nprocs_send, err; char **send_buf = NULL; MPI_Request *requests, *send_req; MPI_Datatype *recv_types; MPI_Status *statuses, status; int sum_recv; int data_sieving = *hole; static char myname[] = "ADIOI_W_EXCHANGE_DATA"; /* create derived datatypes for recv */ nprocs_recv = 0; for (i = 0; i < nprocs; i++) if (recv_size[i]) nprocs_recv++; recv_types = (MPI_Datatype *) ADIOI_Malloc((nprocs_recv + 1) * sizeof(MPI_Datatype)); /* +1 to avoid a 0-size malloc */ j = 0; for (i = 0; i < nprocs; i++) { if (recv_size[i]) { ADIOI_Type_create_hindexed_x(count[i], &(others_req[i].lens[start_pos[i]]), &(others_req[i].mem_ptrs[start_pos[i]]), MPI_BYTE, recv_types + j); /* absolute displacements; use MPI_BOTTOM in recv */ MPI_Type_commit(recv_types + j); j++; } } /* To avoid a read-modify-write, * check if there are holes in the data to be written. * For this, merge the (sorted) offset lists others_req using a heap-merge. */ *srt_num = 0; for (i = 0; i < nprocs; i++) *srt_num += count[i]; if (*srt_off) *srt_off = (ADIO_Offset *) ADIOI_Realloc(*srt_off, (*srt_num + 1) * sizeof(ADIO_Offset)); else *srt_off = (ADIO_Offset *) ADIOI_Malloc((*srt_num + 1) * sizeof(ADIO_Offset)); if (*srt_len) *srt_len = (int *) ADIOI_Realloc(*srt_len, (*srt_num + 1) * sizeof(int)); else *srt_len = (int *) ADIOI_Malloc((*srt_num + 1) * sizeof(int)); /* +1 to avoid a 0-size malloc */ ADIOI_Heap_merge(others_req, count, *srt_off, *srt_len, start_pos, nprocs, nprocs_recv, *srt_num); /* check if there are any holes */ *hole = 0; for (i = 0; i < *srt_num - 1; i++) { if ((*srt_off)[i] + (*srt_len)[i] < (*srt_off)[i + 1]) { *hole = 1; break; } } /* In some cases (see John Bent ROMIO REQ # 835), an odd interaction * between aggregation, nominally contiguous regions, and cb_buffer_size * should be handled with a read-modify-write (otherwise we will write out * more data than we receive from everyone else (inclusive), so override * hole detection */ if (*hole == 0) { sum_recv = 0; for (i = 0; i < nprocs; i++) sum_recv += recv_size[i]; if (size > sum_recv) *hole = 1; } /* check the hint for data sieving */ if (data_sieving == ADIOI_HINT_ENABLE && nprocs_recv && *hole) { ADIO_ReadContig(fd, write_buf, size, MPI_BYTE, ADIO_EXPLICIT_OFFSET, off, &status, &err); // --BEGIN ERROR HANDLING-- if (err != MPI_SUCCESS) { *error_code = MPIO_Err_create_code(err, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**ioRMWrdwr", 0); ADIOI_Free(recv_types); return; } // --END ERROR HANDLING-- } nprocs_send = 0; for (i = 0; i < nprocs; i++) if (send_size[i]) nprocs_send++; if (fd->atomicity) { /* bug fix from Wei-keng Liao and Kenin Coloma */ requests = (MPI_Request *) ADIOI_Malloc((nprocs_send + 1) * sizeof(MPI_Request)); send_req = requests; } else { requests = (MPI_Request *) ADIOI_Malloc((nprocs_send + nprocs_recv + 1)* sizeof(MPI_Request)); /* +1 to avoid a 0-size malloc */ /* post receives */ j = 0; for (i = 0; i < nprocs; i++) { if (recv_size[i]) { MPI_Irecv(MPI_BOTTOM, 1, recv_types[j], i, myrank + i + 100 * iter, fd->comm, requests + j); j++; } } send_req = requests + nprocs_recv; } /* post sends. * if buftype_is_contig, data can be directly sent from * user buf at location given by buf_idx. else use send_buf. */ if (buftype_is_contig) { j = 0; for (i = 0; i < nprocs; i++) if (send_size[i]) { ADIOI_Assert(buf_idx[i] != -1); MPI_Isend(((char *) buf) + buf_idx[i], send_size[i], MPI_BYTE, i, myrank + i + 100 * iter, fd->comm, send_req + j); j++; } } else if (nprocs_send) { /* buftype is not contig */ send_buf = (char **) ADIOI_Malloc(nprocs * sizeof(char *)); for (i = 0; i < nprocs; i++) if (send_size[i]) send_buf[i] = (char *) ADIOI_Malloc(send_size[i]); ADIOI_LUSTRE_Fill_send_buffer(fd, buf, flat_buf, send_buf, offset_list, len_list, send_size, send_req, sent_to_proc, nprocs, myrank, contig_access_count, striping_info, send_buf_idx, curr_to_proc, done_to_proc, iter, buftype_extent); /* the send is done in ADIOI_Fill_send_buffer */ } /* bug fix from Wei-keng Liao and Kenin Coloma */ if (fd->atomicity) { j = 0; for (i = 0; i < nprocs; i++) { MPI_Status wkl_status; if (recv_size[i]) { MPI_Recv(MPI_BOTTOM, 1, recv_types[j], i, myrank + i + 100 * iter, fd->comm, &wkl_status); j++; } } } for (i = 0; i < nprocs_recv; i++) MPI_Type_free(recv_types + i); ADIOI_Free(recv_types); /* bug fix from Wei-keng Liao and Kenin Coloma */ /* +1 to avoid a 0-size malloc */ if (fd->atomicity) { statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send + 1) * sizeof(MPI_Status)); } else { statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send + nprocs_recv + 1) * sizeof(MPI_Status)); } #ifdef NEEDS_MPI_TEST i = 0; if (fd->atomicity) { /* bug fix from Wei-keng Liao and Kenin Coloma */ while (!i) MPI_Testall(nprocs_send, send_req, &i, statuses); } else { while (!i) MPI_Testall(nprocs_send + nprocs_recv, requests, &i, statuses); } #else /* bug fix from Wei-keng Liao and Kenin Coloma */ if (fd->atomicity) MPI_Waitall(nprocs_send, send_req, statuses); else MPI_Waitall(nprocs_send + nprocs_recv, requests, statuses); #endif ADIOI_Free(statuses); ADIOI_Free(requests); if (!buftype_is_contig && nprocs_send) { for (i = 0; i < nprocs; i++) if (send_size[i]) ADIOI_Free(send_buf[i]); ADIOI_Free(send_buf); } }
int main (int argc, char *argv[]) { void inidat(); float ***array; /* array for grid */ int taskid, /* this task's unique id */ numtasks, /* number of tasks */ averow,rows,offset,extra, /* for sending rows of data */ dest, source, /* to - from for message send-receive */ left,right, /* neighbor tasks */ msgtype, /* for message types */ rc,start,end, /* misc */ i,x,y,z,it,size,t_sqrt; /* loop variables */ MPI_Status status; MPI_Datatype dt,dt2; MPI_Request req, req2,req3,req4,req5; double t1,t2; /* First, find out my taskid and how many tasks are running */ MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&numtasks); MPI_Comm_rank(MPI_COMM_WORLD,&taskid); /*Set number of threads */ omp_set_num_threads(atoi(argv[1])); // Use n threads for all consecutive parallel regions omp_set_nested(1); if (taskid == 0) { //printf("Grid size: X= %d Y= %d Time steps= %d\n",NXPROB,NYPROB,STEPS); t1 = MPI_Wtime(); } i = 0; while(i*i < (NXPROB*NYPROB)/numtasks) i++; size = i; i = 0; while(i*i<numtasks) i++; t_sqrt = i; MPI_Type_contiguous(size+2,MPI_FLOAT, &dt); MPI_Type_commit(&dt); MPI_Type_vector(size+2,1,size+2,MPI_FLOAT,&dt2); MPI_Type_commit(&dt2); array = malloc(2*sizeof(float**)); for (i = 0;i<2;i++){ array[i] = malloc((2+size)*sizeof(float*)); array[i][0] = malloc(((2+size)*(2+size))*sizeof(float)); for (x = 1;x<2+size;x++){ array[i][x] = &(array[i][0][x*(2+size)]); } } for (z=0; z<2; z++){ for (x=0; x<2+size; x++){ for (y=0; y<2+size; y++){ array[z][x][y] = 0.0; } } } z = 0; inidat(NXPROB,NYPROB,array[z],size*(taskid/t_sqrt),size*(taskid%t_sqrt),size); for (i = 1; i <= STEPS; i++) { if (taskid/t_sqrt != 0) //not first row { MPI_Isend(array[z][1],1,dt,taskid-t_sqrt,100, MPI_COMM_WORLD, &req); MPI_Irecv(array[z][0],1,dt,taskid-t_sqrt,100, MPI_COMM_WORLD, &req2); } if (taskid/t_sqrt != t_sqrt-1) //not last row { MPI_Isend(array[z][size],1,dt,taskid+t_sqrt,100, MPI_COMM_WORLD, &req); MPI_Irecv(array[z][size+1],1,dt,taskid+t_sqrt,100, MPI_COMM_WORLD, &req3); } if(taskid%t_sqrt != 0) //not last column { MPI_Isend(&array[z][0][1],1,dt2,taskid-1,100, MPI_COMM_WORLD, &req); MPI_Irecv(&array[z][0][0],1,dt2,taskid-1,100, MPI_COMM_WORLD, &req4); } if(taskid%t_sqrt != t_sqrt-1) //not last column { MPI_Isend(&array[z][0][size],1,dt2,taskid+1,100, MPI_COMM_WORLD, &req); MPI_Irecv(&array[z][0][size+1],1,dt2,taskid+1,100, MPI_COMM_WORLD, &req5); } inner_update(size,array[z],array[1-z]); if (taskid/t_sqrt != 0) MPI_Wait(&req2,&status); if (taskid/t_sqrt != t_sqrt-1) MPI_Wait(&req3,&status); if(taskid%t_sqrt != 0) MPI_Wait(&req4,&status); if(taskid%t_sqrt != t_sqrt-1) MPI_Wait(&req5,&status); outer_update(size,taskid,t_sqrt,array[z],array[1-z]); z = 1-z; } if (taskid == 0){ t2 = MPI_Wtime(); printf("MPI_Wtime measured: %1.2f\n", t2-t1); } for (i = 0;i<2;i++){ free(array[i][0]); free(array[i]); } free(array); MPI_Type_free(&dt); MPI_Type_free(&dt2); MPI_Finalize(); }
int HPL_sdrv ( double * SBUF, int SCOUNT, int STAG, double * RBUF, int RCOUNT, int RTAG, int PARTNER, MPI_Comm COMM ) { /* * Purpose * ======= * * HPL_sdrv is a simple wrapper around MPI_Sendrecv. Its main purpose is * to allow for some experimentation and tuning of this simple function. * Messages of length less than or equal to zero are not sent nor * received. Successful completion is indicated by the returned error * code HPL_SUCCESS. * * Arguments * ========= * * SBUF (local input) double * * On entry, SBUF specifies the starting address of buffer to be * sent. * * SCOUNT (local input) int * On entry, SCOUNT specifies the number of double precision * entries in SBUF. SCOUNT must be at least zero. * * STAG (local input) int * On entry, STAG specifies the message tag to be used for the * sending communication operation. * * RBUF (local output) double * * On entry, RBUF specifies the starting address of buffer to be * received. * * RCOUNT (local input) int * On entry, RCOUNT specifies the number of double precision * entries in RBUF. RCOUNT must be at least zero. * * RTAG (local input) int * On entry, RTAG specifies the message tag to be used for the * receiving communication operation. * * PARTNER (local input) int * On entry, PARTNER specifies the rank of the collaborative * process in the communication space defined by COMM. * * COMM (local input) MPI_Comm * The MPI communicator identifying the communication space. * * --------------------------------------------------------------------- */ START_TRACE( SDRV ) /* * .. Local Variables .. */ #ifdef HPL_USE_MPI_DATATYPE MPI_Datatype type[2]; #endif MPI_Request request; MPI_Status status; int ierr; /* .. * .. Executable Statements .. */ if( RCOUNT > 0 ) { if( SCOUNT > 0 ) { #ifdef HPL_USE_MPI_DATATYPE /* * Post asynchronous receive */ ierr = MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type[0] ); if( ierr == MPI_SUCCESS ) { ierr = MPI_Type_commit( &type[0] ); } if( ierr == MPI_SUCCESS ) { ierr = MPI_Irecv( (void *)(RBUF), 1, type[0], PARTNER, RTAG, COMM, &request ); } /* * Blocking send */ if( ierr == MPI_SUCCESS ) { ierr = MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type[1] ); } if( ierr == MPI_SUCCESS ) { ierr = MPI_Type_commit( &type[1] ); } if( ierr == MPI_SUCCESS ) { ierr = MPI_Send( (void *)(SBUF), 1, type[1], PARTNER, STAG, COMM ); } if( ierr == MPI_SUCCESS ) { ierr = MPI_Type_free( &type[1] ); } /* * Wait for the receive to complete */ if( ierr == MPI_SUCCESS ) { ierr = MPI_Wait( &request, &status ); } if( ierr == MPI_SUCCESS ) { ierr = MPI_Type_free( &type[0] ); } #else /* * Post asynchronous receive */ ierr = MPI_Irecv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, PARTNER, RTAG, COMM, &request ); /* * Blocking send */ if( ierr == MPI_SUCCESS ) { ierr = MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, PARTNER, STAG, COMM ); } /* * Wait for the receive to complete */ if( ierr == MPI_SUCCESS ) { ierr = MPI_Wait( &request, &status ); } #endif } else { /* * Blocking receive */ #ifdef HPL_USE_MPI_DATATYPE ierr = MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type[0] ); if( ierr == MPI_SUCCESS ) { ierr = MPI_Type_commit( &type[0] ); } if( ierr == MPI_SUCCESS ) { ierr = MPI_Recv( (void *)(RBUF), 1, type[0], PARTNER, RTAG, COMM, &status ); } if( ierr == MPI_SUCCESS ) { ierr = MPI_Type_free( &type[0] ); } #else ierr = MPI_Recv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, PARTNER, RTAG, COMM, &status ); #endif } } else if( SCOUNT > 0 ) { /* * Blocking send */ #ifdef HPL_USE_MPI_DATATYPE ierr = MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type[1] ); if( ierr == MPI_SUCCESS ) { ierr = MPI_Type_commit( &type[1] ); } if( ierr == MPI_SUCCESS ) { ierr = MPI_Send( (void *)(SBUF), 1, type[1], PARTNER, STAG, COMM ); } if( ierr == MPI_SUCCESS ) { ierr = MPI_Type_free( &type[1] ) ); }
static int exchange_direc1( double *direc1, int *local_global_index, idx_t *epart, int neighbors_count, int *send_count, int **send_list, int *recv_count, int **recv_list ) { MPI_Request *send_request, *recv_request; double **send_buf, **recv_buf; int n, i; int rank; MPI_Comm_rank( MPI_COMM_WORLD, &rank ); // Start sending if( ( send_request = malloc( neighbors_count * sizeof( MPI_Request ) ) ) == NULL ) { return -1; } if( ( send_buf = malloc( neighbors_count * sizeof( double * ) ) ) == NULL ) { return -1; } for( n = 0; n < neighbors_count; n++ ) { if( ( send_buf[n] = malloc( send_count[n] * sizeof( double ) ) ) == NULL ) { return -1; } for( i = 0; i < send_count[n]; i++ ) { send_buf[n][i] = direc1[send_list[n][i]]; } int dest = epart[local_global_index[recv_list[n][0]]]; MPI_Isend( send_buf[n], send_count[n], MPI_DOUBLE, dest, 0, MPI_COMM_WORLD, &send_request[n] ); } // Start receiving if( ( recv_request = malloc( neighbors_count * sizeof( MPI_Request ) ) ) == NULL ) { return -1; } if( ( recv_buf = malloc( neighbors_count * sizeof( double * ) ) ) == NULL ) { return -1; } for( n = 0; n < neighbors_count; n++ ) { if( ( recv_buf[n] = malloc( recv_count[n] * sizeof( double ) ) ) == NULL ) { return -1; } int source = epart[local_global_index[recv_list[n][0]]]; MPI_Irecv( recv_buf[n], recv_count[n], MPI_DOUBLE, source, 0, MPI_COMM_WORLD, &recv_request[n] ); } // Wait for data to be received for( n = 0; n < neighbors_count; n++ ) { MPI_Wait( &recv_request[n], MPI_STATUS_IGNORE ); for( i = 0; i < recv_count[n]; i++ ) { direc1[recv_list[n][i]] = recv_buf[n][i]; } free( recv_buf[n] ); } free( recv_buf ); free( recv_request ); // Wait for data to be sent for( n = 0; n < neighbors_count; n++ ) { MPI_Wait( &send_request[n], MPI_STATUS_IGNORE ); free( send_buf[n] ); } free( send_buf ); free( send_request ); return 0; }
int main (int argc, char *argv[]) { int numtasks, rank, buf, tag1=1, i, rc, dest, src, offset, nreqs; double T1, T2; MPI_Request reqs[REPS*2]; MPI_Status stats[REPS*2]; MPI_Init(&argc,&argv); MPI_Comm_size(COMM, &numtasks); MPI_Comm_rank(COMM, &rank); /* Require 4 tasks */ if (rank == 0 ) { if (numtasks != 4) { printf("ERROR: Number of tasks must be 4. Quitting.\n"); MPI_Abort(COMM, rc); } printf("Starting isend/irecv send/irecv test...\n"); } /* Use barriers for clean output */ MPI_Barrier(COMM); printf("Task %d starting...\n", rank); MPI_Barrier(COMM); T1 = MPI_Wtime(); /* start the clock */ /* Tasks 0 and 1 do the isend/irecv test. * Determine who to send/receive with. nreqs specifies how many non-blocking * operation request handles to capture. offset is where the task should * store each request as it is captured in the reqs() array. */ if (rank < 2) { nreqs = REPS*2; if (rank == 0) { src = 1; offset = 0; } if (rank == 1) { src = 0; offset = 0; } dest = src; /* Do the non-blocking send and receive operations */ for (i=0; i<REPS; i++) { MPI_Isend(&rank, 1, MPI_INT, dest, tag1, COMM, &reqs[offset]); MPI_Irecv(&buf, 1, MPI_INT, src, tag1, COMM, &reqs[offset+1]); offset += 2; if ((i+1)%DISP == 0) printf("Task %d has done %d isends/irecvs\n", rank, i+1); } } /* Tasks 2 and 3 do the send/irecv test. Determine who to send/receive with. nreqs specifies how many non-blocking operation request handles to capture. offset is where the task should store each request as it is captured in the reqs() array. */ if (rank > 1) { nreqs = REPS; /* Task 2 does the blocking send operation */ if (rank == 2) { dest = 3; for (i=0; i<REPS; i++) { MPI_Send(&rank, 1, MPI_INT, dest, tag1, COMM); if ((i+1)%DISP == 0) printf("Task %d has done %d sends\n", rank, i+1); } } /* Task 3 does the non-blocking receive operation */ if (rank == 3) { src = 2; offset = 0; for (i=0; i<REPS; i++) { MPI_Irecv(&buf, 1, MPI_INT, src, tag1, COMM, &reqs[offset]); offset += 1; if ((i+1)%DISP == 0) printf("Task %d has done %d irecvs\n", rank, i+1); } } } /* Wait for all non-blocking operations to complete and record time */ MPI_Waitall(nreqs, reqs, stats); T2 = MPI_Wtime(); /* end time */ MPI_Barrier(COMM); printf("Task %d time(wall)= %lf sec\n", rank, T2-T1); MPI_Finalize(); }
int main(int argc, char* argv[]) { int nprocs, myid; int nrl; // Initialize MPI. MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &myid); printf("This is process %d out of %d processes.\n",myid,nprocs); // Exit if the number of arguments is not 5. if (argc != 6) { printf("Usage: laplace nrows ncols niter iprint relerr\n"); exit(1); } // Process the command line arguments. int nr=atoi(argv[1]); int nc=atoi(argv[2]); int niter=atoi(argv[3]); int iprint=atoi(argv[4]); double relerr=atof(argv[5]); int i,j,iter; // Get some timing information. double time1=MPI_Wtime(); // Cap the number of iterations. if (niter >= MAXITER) { printf("Warnig: Changed the number of iterations to %d.",MAXITER); niter=MAXITER; } if (nr%nprocs != 0) { if (myid == 0) printf("Error: the number of rows is not multiple of nprocs!\n"); MPI_Finalize(); exit(2); } nrl=nr/nprocs; printf("Process %d has row %d through %d\n",myid,myid*nrl+1,(myid+1)*nrl); double **t; t=malloc((nrl+2)*sizeof(double *)); for (i=0;i<nrl+2;i++) t[i]=malloc((nc+2)*sizeof(double *)); double **told; told=malloc((nrl+2)*sizeof(double *)); for (i=0;i<nrl+2;i++) told[i]=malloc((nc+2)*sizeof(double *)); // Initialize the array. for (i=0; i<nrl+2; i++) for (j=0; j<nc+2; j++) told[i][j]=0.; // Set the boundary condition. // Right boundary double tmin=myid*100.0/nprocs; double tmax=(myid+1)*100.0/nprocs; for (i=0;i<nrl+2;i++) { t[i][nc+1]=tmin+((tmax-tmin)/nrl)*i; told[i][nc+1]=t[i][nc+1]; } // Bottom boundary if (myid == nprocs-1) { for (j=0;j<nc+2;j++) { t[nrl+1][j]=(100.0/nc)*j; told[nrl+1][j]=t[nrl+1][j]; } } // Main loop. int tagu=100,tagd=101; MPI_Request reqidu,reqidd; MPI_Status status; double dt,dtg; for (iter=1;iter<=niter;iter++) { for (i=1;i<=nrl;i++) for (j=1;j<=nc;j++) t[i][j]=0.25*(told[i+1][j]+told[i-1][j]+told[i][j-1]+told[i][j+1]); // Update the boundary data. We'll send above and receive from // below first, then send below and receive from above. In both // cases, we'll post the receives first, and then do the sends. // Send U - Recv D: if (myid != nprocs-1) MPI_Irecv(t[nrl+1],nc+2,MPI_DOUBLE,myid+1,tagu,MPI_COMM_WORLD,&reqidu); if (myid != 0) MPI_Send(t[1],nc+2,MPI_DOUBLE,myid-1,tagu,MPI_COMM_WORLD); if (myid != nprocs-1) MPI_Wait(&reqidu,&status); // Send D - Recv U: if (myid != 0) MPI_Irecv(t[0],nc+2,MPI_DOUBLE,myid-1,tagd,MPI_COMM_WORLD,&reqidd); if (myid != nprocs-1) MPI_Send(t[nrl],nc+2,MPI_DOUBLE,myid+1,tagd,MPI_COMM_WORLD); if (myid != 0) MPI_Wait(&reqidd,&status); // Check on convergence, and move current values to old dt=0; for (i=1;i<=nrl;i++) { for (j=1;j<=nc;j++) { dt=fmax(fabs(t[i][j]-told[i][j]),dt); told[i][j]=t[i][j]; } } // blank 1: Find the global max convergence error with one MPI function call. // Check if output is required. if (myid == 0) if (iprint != 0) if (iter%iprint == 0) printf("Iteration: %d; Convergence Error: %f\n",iter,dtg); // Check if convergence criteria meet. if (dtg < relerr) { printf("\nSolution has converged.\n"); break; } } free(t); free(told); // Print out the execution time. if (myid == 0) { double time2=MPI_Wtime(); printf("\nTotal Time (sec): %f.\n",time2-time1); } MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int size,rank, left, right, you, ndata=127,ndata_max=127,seed; int rv; long long int i,j,k; unsigned long long int nflop=0,nmem=1,nsleep=0,nrep=1, myflops; char *env_ptr; double *sbuf, *rbuf,*x; MPI_Status *s; MPI_Request *r; time_t ts; #ifdef HPM if((rv = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ) { fprintf(stderr, "Error: %d %s\n",rv, errstring); exit(1); } if ((num_hwcntrs = PAPI_num_counters()) < PAPI_OK) { printf("There are no counters available. \n"); exit(1); } if ( (rv = PAPI_start_counters(events, 2)) != PAPI_OK) { fprintf(stdout, "ERROR PAPI_start_counters rv=%d\n", rv); exit(rv); } #endif seed = time(&ts); flags |= DOMPI; while(--argc && argv++) { if(!strcmp("-v",*argv)) { flags |= DOVERBOSE; } else if(!strcmp("-n",*argv)) { --argc; argv++; nflop = atol(*argv); } else if(!strcmp("-N",*argv)) { --argc; argv++; nrep = atol(*argv); } else if(!strcmp("-d",*argv)) { --argc; argv++; ndata_max = ndata = atol(*argv); } else if(!strcmp("-m",*argv)) { --argc; argv++; nmem = atol(*argv); } else if(!strcmp("-s",*argv)) { --argc; argv++; nsleep = atol(*argv); } else if(!strcmp("-spray",*argv)) { flags |= DOSPRAY; } else if(!strcmp("-c",*argv)) { flags |= CORE; } else if(!strcmp("-r",*argv)) { flags |= REGION; } else if(!strcmp("-stair",*argv)) { flags |= STAIR_RANK; } else if(!strcmp("-stair_region",*argv)) { flags |= STAIR_REGION; } else if(!strcmp("-nompi",*argv)) { flags &= ~DOMPI; } } if(flags & DOMPI) { MPI_Init(&argc,&argv); /* MPI_Init(&argc,&argv); */ MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); } if(nmem) { nmem = (nmem*1024*1024/sizeof(double)); x = (double *)malloc((size_t)(nmem*sizeof(double))); for(j=0;j<nrep;j++) { for(i=0;i<nmem;i++) { x[i] = i; } for(i=0;i<nmem;i++) { x[i] = i*x[i]; } if(x[nmem-1]*x[nmem-1] < 0) { printf("trickster\n"); } } if(0) free((char *)x); } #ifdef IPM if(flags & REGION && rank > -1 ) MPI_Pcontrol(1,"region_zzzzzzzzzzzZz"); #endif if(nflop) { x = (double *)malloc((size_t)(10*sizeof(double))); j = k = 0; for(i=0;i<10;i++) { x[i] = 1.0; } if(flags & STAIR_RANK) { myflops = (rank*nflop)/size; } else { myflops = nflop; } for(i=0;i<nflop;i++) { x[j] = x[j]*x[k]; j = ((i%9)?(j+1):(0)); k = ((i%8)?(k+1):(0)); } free((char *)x); } if(nsleep) { sleep(nsleep); } #ifdef IPM if(flags & REGION && rank > -1 ) MPI_Pcontrol(-1,"region_zzzzzzzzzzzZz"); #endif if(nmem<nflop) nmem=nflop; if(nflop>1) printf("FLOPS = %lld BYTES = %lld\n", nflop, nmem); fflush(stdout); if(flags & CORE) { for(i=0;;i++) { x[i] = x[i*i-1000]; } } env_ptr = getenv("IPM_SOCKET"); if(env_ptr) { printf("IPM: %d IPM_SOCKET in app %s\n", rank, env_ptr); } if(flags & DOMPI) { s = (MPI_Status *)malloc((size_t)(sizeof(MPI_Status)*2*size)); r = (MPI_Request *)malloc((size_t)(sizeof(MPI_Request)*2*size)); sbuf = (double *)malloc((size_t)(ndata_max*sizeof(double))); rbuf = (double *)malloc((size_t)(ndata_max*sizeof(double))); for(i=0;i<ndata_max;i++) { sbuf[i] = rbuf[i] = i; } MPI_Bcast(&seed,1,MPI_INT,0,MPI_COMM_WORLD); srand48(seed); for(i=0;i<nrep;i++) { MPI_Bcast(sbuf,ndata_max,MPI_DOUBLE,0,MPI_COMM_WORLD); } if(size>1) { if(!rank) {left=size-1;} else { left = rank-1;} if(rank == size-1) { right=0;} else {right=rank+1;} you = (rank < size/2)?(rank+size/2):(rank-size/2); for(i=0;i<nrep;i++) { if(flags & DOSPRAY) { ndata = (long int)(drand48()*ndata_max)+1; } MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,right,1,rbuf,ndata,MPI_DOUBLE,left,1,MPI_COMM_WORLD,s); MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,left,1,rbuf,ndata,MPI_DOUBLE,right,1,MPI_COMM_WORLD,s); #ifdef IPM if(flags & REGION) MPI_Pcontrol(1,"region_a"); #endif MPI_Barrier(MPI_COMM_WORLD); MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,left,1,rbuf,ndata,MPI_DOUBLE,right,1,MPI_COMM_WORLD,s); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); MPI_Isend(sbuf,ndata,MPI_DOUBLE,you,0,MPI_COMM_WORLD, r); MPI_Recv(rbuf,ndata,MPI_DOUBLE,MPI_ANY_SOURCE,0,MPI_COMM_WORLD, s); MPI_Wait(r,s); MPI_Irecv(rbuf,ndata,MPI_DOUBLE,MPI_ANY_SOURCE,0,MPI_COMM_WORLD,r); MPI_Send(sbuf,ndata,MPI_DOUBLE,you,0,MPI_COMM_WORLD); MPI_Wait(r,s); for(j=0;j<size;j++) { MPI_Isend(sbuf+j%ndata_max,1,MPI_DOUBLE,j,4,MPI_COMM_WORLD, r+j); MPI_Irecv(rbuf+j%ndata_max,1,MPI_DOUBLE,j,4,MPI_COMM_WORLD,r+size+j); } MPI_Waitall(2*size,r,s); /* for(j=0;j<size;j++) { printf("rep %d stat %d %d %d\n",i, j, s[j].MPI_SOURCE, s[j+size].MPI_SOURCE); } */ #ifdef IPM if(flags & REGION) MPI_Pcontrol(-1,"region_a"); #endif #ifdef IPM if(flags & REGION) MPI_Pcontrol(1,"region_b"); #endif MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); #ifdef IPM if(flags & REGION) MPI_Pcontrol(-1,"region_b"); #endif if(1) { #ifdef IPM if(flags & REGION) MPI_Pcontrol(1,"region_c"); #endif MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); #ifdef IPM if(flags & REGION) MPI_Pcontrol(-1,"region_c"); #endif #ifdef IPM if(flags & REGION) MPI_Pcontrol(1,"region_d"); #endif MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); #ifdef IPM if(flags & REGION) MPI_Pcontrol(-1,"region_d"); #endif #ifdef IPM if(flags & REGION) MPI_Pcontrol(1,"region_e"); #endif MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); #ifdef IPM if(flags & REGION) MPI_Pcontrol(-1,"region_e"); #endif #ifdef IPM if(flags & REGION) MPI_Pcontrol(1,"region_f"); #endif MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); #ifdef IPM if(flags & REGION) MPI_Pcontrol(-1,"region_f"); #endif #ifdef IPM if(flags & REGION) MPI_Pcontrol(1,"region_g"); #endif MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); #ifdef IPM if(flags & REGION) MPI_Pcontrol(-1,"region_g"); #endif #ifdef IPM if(flags & REGION) MPI_Pcontrol(1,"region_h"); #endif MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); #ifdef IPM if(flags & REGION) MPI_Pcontrol(-1,"region_h"); #endif #ifdef IPM if(flags & REGION) MPI_Pcontrol(1,"region_i"); #endif MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1, MPI_COMM_WORLD); #ifdef IPM if(flags & REGION) MPI_Pcontrol(-1,"region_i"); #endif } } } MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); } #ifdef HPM if ((rv=PAPI_stop_counters(values, 2)) != PAPI_OK) { fprintf(stdout, "ERROR PAPI_stop_counters rv=%d\n", rv); exit(rv); } printf("PAPI: total instruction/cycles %lld/%lld %.3e \n", values[0], values[1], values[0]/(values[1]*1.0) ); #endif return 0; }
void op_exchange_halo(op_arg* arg) { //int my_rank, comm_size; //MPI_Comm_rank(OP_MPI_WORLD, &my_rank); //MPI_Comm_size(OP_MPI_WORLD, &comm_size); op_dat dat = arg->dat; if(arg->sent == 1) { printf("Error: Halo exchange already in flight for dat %s\n", dat->name); fflush(stdout); MPI_Abort(OP_MPI_WORLD, 2); } //need to exchange both direct and indirect data sets if they are dirty if((arg->acc == OP_READ || arg->acc == OP_RW /* good for debug || arg->acc == OP_INC*/) && (dat->dirtybit == 1)) { //printf("Exchanging Halo of data array %10s\n",dat->name); halo_list imp_exec_list = OP_import_exec_list[dat->set->index]; halo_list imp_nonexec_list = OP_import_nonexec_list[dat->set->index]; halo_list exp_exec_list = OP_export_exec_list[dat->set->index]; halo_list exp_nonexec_list = OP_export_nonexec_list[dat->set->index]; //-------first exchange exec elements related to this data array-------- //sanity checks if(compare_sets(imp_exec_list->set,dat->set) == 0) { printf("Error: Import list and set mismatch\n"); MPI_Abort(OP_MPI_WORLD, 2); } if(compare_sets(exp_exec_list->set,dat->set) == 0) { printf("Error: Export list and set mismatch\n"); MPI_Abort(OP_MPI_WORLD, 2); } int set_elem_index; for(int i=0; i<exp_exec_list->ranks_size; i++) { for(int j = 0; j < exp_exec_list->sizes[i]; j++) { set_elem_index = exp_exec_list->list[exp_exec_list->disps[i]+j]; memcpy(&((op_mpi_buffer)(dat->mpi_buffer))-> buf_exec[exp_exec_list->disps[i]*dat->size+j*dat->size], (void *)&dat->data[dat->size*(set_elem_index)],dat->size); } //printf("export from %d to %d data %10s, number of elements of size %d | sending:\n ", // my_rank, exp_exec_list->ranks[i], dat->name,exp_exec_list->sizes[i]); MPI_Isend(&((op_mpi_buffer)(dat->mpi_buffer))-> buf_exec[exp_exec_list->disps[i]*dat->size], dat->size*exp_exec_list->sizes[i], MPI_CHAR, exp_exec_list->ranks[i], dat->index, OP_MPI_WORLD, &((op_mpi_buffer)(dat->mpi_buffer))-> s_req[((op_mpi_buffer)(dat->mpi_buffer))->s_num_req++]); } int init = dat->set->size*dat->size; for(int i=0; i < imp_exec_list->ranks_size; i++) { // printf("import on to %d from %d data %10s, number of elements of size %d | recieving:\n ", // my_rank, imp_exec_list->ranks[i], dat->name, imp_exec_list->sizes[i]); MPI_Irecv(&(dat->data[init+imp_exec_list->disps[i]*dat->size]), dat->size*imp_exec_list->sizes[i], MPI_CHAR, imp_exec_list->ranks[i], dat->index, OP_MPI_WORLD, &((op_mpi_buffer)(dat->mpi_buffer))-> r_req[((op_mpi_buffer)(dat->mpi_buffer))->r_num_req++]); } //-----second exchange nonexec elements related to this data array------ //sanity checks if(compare_sets(imp_nonexec_list->set,dat->set) == 0) { printf("Error: Non-Import list and set mismatch"); MPI_Abort(OP_MPI_WORLD, 2); } if(compare_sets(exp_nonexec_list->set,dat->set)==0) { printf("Error: Non-Export list and set mismatch"); MPI_Abort(OP_MPI_WORLD, 2); } for(int i=0; i<exp_nonexec_list->ranks_size; i++) { for(int j = 0; j < exp_nonexec_list->sizes[i]; j++) { set_elem_index = exp_nonexec_list->list[exp_nonexec_list->disps[i]+j]; memcpy(&((op_mpi_buffer)(dat->mpi_buffer))-> buf_nonexec[exp_nonexec_list->disps[i]*dat->size+j*dat->size], (void *)&dat->data[dat->size*(set_elem_index)],dat->size); } //printf("export from %d to %d data %10s, number of elements of size %d | sending:\n ", // my_rank, exp_nonexec_list->ranks[i], dat->name,exp_nonexec_list->sizes[i]); MPI_Isend(&((op_mpi_buffer)(dat->mpi_buffer))-> buf_nonexec[exp_nonexec_list->disps[i]*dat->size], dat->size*exp_nonexec_list->sizes[i], MPI_CHAR, exp_nonexec_list->ranks[i], dat->index, OP_MPI_WORLD, &((op_mpi_buffer)(dat->mpi_buffer))-> s_req[((op_mpi_buffer)(dat->mpi_buffer))->s_num_req++]); } int nonexec_init = (dat->set->size+imp_exec_list->size)*dat->size; for(int i=0; i<imp_nonexec_list->ranks_size; i++) { //printf("import on to %d from %d data %10s, number of elements of size %d | recieving:\n ", // my_rank, imp_nonexec_list->ranks[i], dat->name, imp_nonexec_list->sizes[i]); MPI_Irecv(&(dat->data[nonexec_init+imp_nonexec_list->disps[i]*dat->size]), dat->size*imp_nonexec_list->sizes[i], MPI_CHAR, imp_nonexec_list->ranks[i], dat->index, OP_MPI_WORLD, &((op_mpi_buffer)(dat->mpi_buffer))-> r_req[((op_mpi_buffer)(dat->mpi_buffer))->r_num_req++]); } //clear dirty bit dat->dirtybit = 0; arg->sent = 1; } }
int member(int rank, int nprocs) { int lb0src, lb1src, lb2src, ub0src, ub1src, ub2src; int arraySize_X_src, arraySize_Y_src, arraySize_Z_src; int lb0dest, lb1dest, lb2dest, ub0dest, ub1dest, ub2dest; int arraySize_X_dest, arraySize_Y_dest, arraySize_Z_dest; int i,j,k; MPI_Bcast( &lb0src, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &lb1src, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &lb2src, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &ub0src, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &ub1src, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &ub2src, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &arraySize_X_src, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &arraySize_Y_src, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &arraySize_Z_src, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &lb0dest, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &lb1dest, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &lb2dest, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &ub0dest, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &ub1dest, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &ub2dest, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &arraySize_X_dest, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &arraySize_Y_dest, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &arraySize_Z_dest, 1, MPI_INT, 0, MPI_COMM_WORLD); double *distsrc; double *distdest; int nghost = 1; int distdestsize = arraySize_Z_dest / nprocs; if(rank < arraySize_Z_dest%nprocs) { distdestsize++; } distsrc = (double*)calloc(1,sizeof(double)*(distdestsize+2*nghost)*arraySize_X_src*arraySize_Y_src); distdest = (double*)calloc(1,sizeof(double)*(distdestsize)*arraySize_X_dest*arraySize_Y_dest); //cout << "Rank dest: " << rank << " " << offsetdest << " " << distdestsize << " " << arraySize_Z_dest << endl; // receive data from master int src = 0, recv_tag=1; MPI_Request recv_reqs[1]; MPI_Status recv_status[1]; int recvSize = (distdestsize + 2 * nghost) * arraySize_X_src*arraySize_Y_src; // receiving data MPI_Irecv(distsrc, recvSize, MPI_DOUBLE, src, recv_tag, MPI_COMM_WORLD, &recv_reqs[0]); // MPI_Irecv(distdest, distsrcsize*bxsrc.size(0)*bxsrc.size(1), MPI_DOUBLE, src, tag, MPI_COMM_WORLD, &reqs[1]); MPI_Waitall(1,recv_reqs,recv_status); // int idx; // for(idx = 0; idx < distsrcsize*bxsrc.size(0)*bxsrc.size(1); ++idx) // printf("Receiver:%d has result %d: %f\n",rank, idx, distsrc[idx]); // computation for (k = lb2dest; k < distdestsize; ++k) { for (j = lb1dest; j <= ub1dest; ++j) { for (i = lb0dest; i <= ub0dest; ++i) { distdest[arraySize_X_dest * (arraySize_Y_dest * k + j) + i] = \ distsrc[arraySize_X_src * (arraySize_Y_src * ((k-lb2src) + -1) + (j-lb1src)) + (i-lb0src)] + \ distsrc[arraySize_X_src * (arraySize_Y_src * ((k-lb2src) + 1) + (j-lb1src)) + (i-lb0src)] + \ distsrc[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + ((j-lb1src) + -1)) + (i-lb0src)] + \ distsrc[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + ((j-lb1src) + 1)) + (i-lb0src)] + \ distsrc[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + (j-lb1src)) + ((i-lb0src) + -1)] + \ distsrc[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + (j-lb1src)) + ((i-lb0src) + 1)] + \ distsrc[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + (j-lb1src)) + (i-lb0src)] * -6.00000; #if debug cout << "rank" << rank << " " << i << " " << j << " " << k << " " << distdest[arraySize_X_dest * (arraySize_Y_dest * k + j) + i] << "= " << \ distsrc[arraySize_X_src * (arraySize_Y_src * ((k-lb2src) + -1) + (j-lb1src)) + (i-lb0src)] << "+" << \ distsrc[arraySize_X_src * (arraySize_Y_src * ((k-lb2src) + 1) + (j-lb1src)) + (i-lb0src)] << "+" << \ distsrc[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + ((j-lb1src) + -1)) + (i-lb0src)] << "+" << \ distsrc[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + ((j-lb1src) + 1)) + (i-lb0src)] << "+" << \ distsrc[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + (j-lb1src)) + ((i-lb0src) + -1)] << "+" << \ distsrc[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + (j-lb1src)) + ((i-lb0src) + 1)] << "+" << \ distsrc[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + (j-lb1src)) + (i-lb0src)] << "* -6.00000" << endl; #endif } //cout << rank<<": end of j = " << j << endl; } //cout << "end of k = " << k << endl; } int dest = 0, send_tag=1; MPI_Request send_reqs[1]; MPI_Status send_status[1]; // team members send back data MPI_Isend(distdest, distdestsize*arraySize_X_dest*arraySize_Y_dest, MPI_DOUBLE, dest, send_tag, MPI_COMM_WORLD, send_reqs); // int idx; // for(idx = 0; idx < distdestsize*arraySize_X_dest*arraySize_Y_dest; ++idx) // printf("rank %d send result %d: %f\n",rank, idx, distdest[idx]); MPI_Waitall(1,send_reqs,send_status); return 0; }
/* slave 进程 */ void worker() { printf("\tProcessor %d at %s begin work..\n", myid, processor_name); MPI_Status status; MPI_Request handle; int recv_flag = 0; int count = 0; int upload = 0; // 非阻塞接收主进程消息 MPI_Irecv(selectedGenes, n, MPI_GENETYPE, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &handle); while(1) { // 独立繁衍count代 count = generations; while(count--) { select(); crossover(); mutate(); evaluate(); prefer(); // 若满足终止条件,则向主进程发送最优路径,并结束进程 if(population[CARDINALITY].fitness <= optimal+margin) { printf("\tProcessor %d at %s Terminated\n", myid, processor_name); MPI_Send(&population[CARDINALITY], 1, MPI_GENETYPE, 0, DONE_TAG, MPI_COMM_WORLD); printf("\tProcessor %d at %s exit\n", myid, processor_name); return; } // 探测是否收到主进程的消息 MPI_Test(&handle, &recv_flag, &status); // 若收到主进程的消息 if(recv_flag) { printf("\tProcessor %d at %s recv %d\n", myid, processor_name, status.MPI_TAG); // 状态重置 recv_flag = 0; // 若接收到DONE_TAG则结束进程 if(status.MPI_TAG == DONE_TAG) { printf("\tProcessor %d at %s exit\n", myid, processor_name); return; } // 否则,将接收到的优良个体替换种群中最差的个体 qsort(population, CARDINALITY, sizeof(GeneType), compare); for(int i=1; i <= n; i++) assign(&population[CARDINALITY-i], &selectedGenes[i-1]); if(selectedGenes[0].fitness < population[CARDINALITY].fitness) assign(&population[CARDINALITY], &selectedGenes[0]); // 非阻塞接收主进程消息 MPI_Irecv(selectedGenes, n, MPI_GENETYPE, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &handle); } } // 繁衍count代后,若没有终止则向主进程发送最优个体 select_N_best(n); MPI_Send(selectedGenes, n, MPI_GENETYPE, 0, PUT_BETTER_TAG, MPI_COMM_WORLD); printf("\tProcessor %d at %s upload %d\n", myid, processor_name, upload++); } }
int master(int rank, int nprocs) { const class Point zero = getZeros(); const class Point ones = getOnes(); const class Point negones = ones * -1; const class Point lo(zero); // DQ (2/7/2015): Fixup for error yet to be fixed in ROSE (or fixed on alternative branch not yet merged). // Point hi = getOnes()*(BLOCKSIZE-1); const int adjustedBlockSize = SIZE; const class Point hi = getOnes() * adjustedBlockSize; //box low and high corners for destination const class Box bxdest(lo,hi); // This will grow the box by one ghost // along each face and become the box for // the source box. const class Box bxsrc = bxdest . grow (1); // source and destination data containers class RectMDArray< double , 1 , 1 , 1 > Asrc(bxsrc); class RectMDArray< double , 1 , 1 , 1 > Adest(bxdest); // all the coefficients I need for this operation const double ident = 1.0; // DQ (2/18/2015): I need the simpler version because the current constant folding does not operate on floating point values. // const double C0 = -2.0 * DIM; const double C0 = -6.00000; initialize(Asrc); initialize(Adest); #if debug cout <<" The source Box" << endl; Asrc.print(); cout << endl; #endif // build the stencil, and the stencil operator // Stencil<double> laplace(wt,shft); const std::array< Shift , 3 > S = getShiftVec(); // This calls: template <class T> Stencil<T> operator*(T a_coef, Shift a_shift); class Stencil< double > laplace = C0*((S)^(zero)); for (int dir = 0; dir < 3; dir++) { const class Point thishft = getUnitv(dir); // DQ (2/15/2015): Added operator+=() to support clearer updates of an existing object for compile-time analysis. // laplace = laplace + ident*(S^thishft); // laplace = laplace + ident*(S^(thishft*(-1))); laplace += ident*((S)^(thishft)); laplace += ident*((S)^thishft * -1); } // laplace.stencilDump(); // StencilOperator<double,double, double> op; double begin = MPI_Wtime(); int lb2src = bxsrc . getLowCorner ()[2]; int k = 0; int ub2src = bxsrc . getHighCorner ()[2]; int arraySize_X_src = bxsrc . size (0); int lb1src = bxsrc . getLowCorner ()[1]; int j = 0; int ub1src = bxsrc . getHighCorner ()[1]; int arraySize_Y_src = bxsrc . size (1); int lb0src = bxsrc . getLowCorner ()[0]; int i = 0; int ub0src = bxsrc . getHighCorner ()[0]; int arraySize_Z_src = bxsrc . size (2); int lb2dest = bxdest . getLowCorner ()[2]; int ub2dest = bxdest . getHighCorner ()[2]; int arraySize_X_dest = bxdest . size (0); int lb1dest = bxdest . getLowCorner ()[1]; int ub1dest = bxdest . getHighCorner ()[1]; int arraySize_Y_dest = bxdest . size (1); int lb0dest = bxdest . getLowCorner ()[0]; int ub0dest = bxdest . getHighCorner ()[0]; int arraySize_Z_dest = bxdest . size (2); double *sourceDataPointer = Asrc . getPointer(); double *destinationDataPointer = Adest . getPointer(); for (k = lb2dest; k <= ub2dest; ++k) { for (j = lb1dest; j <= ub1dest; ++j) { for (i = lb0dest; i <= ub0dest; ++i) { destinationDataPointer[arraySize_X_dest * (arraySize_Y_dest * k + j) + i] = sourceDataPointer[arraySize_X_src * (arraySize_Y_src * ((k-lb2src) + -1) + (j-lb1src)) + (i-lb0src)] + sourceDataPointer[arraySize_X_src * (arraySize_Y_src * ((k-lb2src) + 1) + (j-lb1src)) + (i-lb0src)] + sourceDataPointer[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + ((j-lb1src) + -1)) + (i-lb0src)] + sourceDataPointer[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + ((j-lb1src) + 1)) + (i-lb0src)] + sourceDataPointer[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + (j-lb1src)) + ((i-lb0src) + -1)] + sourceDataPointer[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + (j-lb1src)) + ((i-lb0src) + 1)] + sourceDataPointer[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + (j-lb1src)) + (i-lb0src)] * -6.00000; #if debug cout << i << " " << j << " " << k << " " << destinationDataPointer[arraySize_X_dest * (arraySize_Y_dest * k + j) + i] << "= " << sourceDataPointer[arraySize_X_src * (arraySize_Y_src * ((k-lb2src) + -1) + (j-lb1src)) + (i-lb0src)] << "+" << sourceDataPointer[arraySize_X_src * (arraySize_Y_src * ((k-lb2src) + 1) + (j-lb1src)) + (i-lb0src)] << "+" << sourceDataPointer[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + ((j-lb1src) + -1)) + (i-lb0src)] << "+" << sourceDataPointer[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + ((j-lb1src) + 1)) + (i-lb0src)] << "+" << sourceDataPointer[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + (j-lb1src)) + ((i-lb0src) + -1)] << "+" << sourceDataPointer[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + (j-lb1src)) + ((i-lb0src) + 1)] << "+" << sourceDataPointer[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + (j-lb1src)) + (i-lb0src)] << "* -6.00000" << endl; #endif } } } double end = MPI_Wtime(); double elapsed_secs = (end - begin); cout << "Exec. time for serial code: " << elapsed_secs << endl; #if debug cout <<" The serail result" << endl; Adest.print(); cout << endl; #endif // real MPI in the following class RectMDArray< double , 1 , 1 , 1 > Adest_new(bxdest); initialize(Adest_new); double *destinationDataPointer_new = Adest_new . getPointer(); begin = MPI_Wtime(); MPI_Bcast( &lb0src, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &lb1src, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &lb2src, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &ub0src, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &ub1src, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &ub2src, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &arraySize_X_src, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &arraySize_Y_src, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &arraySize_Z_src, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &lb0dest, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &lb1dest, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &lb2dest, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &ub0dest, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &ub1dest, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &ub2dest, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &arraySize_X_dest, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &arraySize_Y_dest, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast( &arraySize_Z_dest, 1, MPI_INT, 0, MPI_COMM_WORLD); double *distsrc; double *distdest; int nghost = 1; int distsrcsize = arraySize_Z_src / nprocs; int offsetsrc = rank * distsrcsize; if(rank < arraySize_Z_src%nprocs) { distsrcsize++; } if(rank >= arraySize_Z_src%nprocs) offsetsrc += arraySize_Z_src%nprocs; else offsetsrc += rank; int distdestsize = arraySize_Z_dest / nprocs; int offsetdest = rank * distdestsize; if(rank < arraySize_Z_dest%nprocs) { distdestsize++; } if(rank >= arraySize_Z_dest%nprocs) offsetdest += arraySize_Z_dest%nprocs; else offsetdest += rank; distsrc = (double*)calloc(1,sizeof(double)*(distdestsize+2*nghost)*arraySize_X_src*arraySize_Y_src); distdest = (double*)calloc(1,sizeof(double)*(distdestsize)*arraySize_X_dest*arraySize_Y_dest); // team leader send data to all members int copyOffset = offsetdest * arraySize_X_src*arraySize_Y_src; int copySize = (distdestsize + 2 * nghost) * arraySize_X_src*arraySize_Y_src; if(nprocs > 1) { int dest, send_tag=1; MPI_Request send_reqs[nprocs-1]; MPI_Status send_status[nprocs-1]; for(dest = 1; dest < nprocs; ++dest) { int sendSize = arraySize_Z_dest / nprocs; int sendOffset = dest * sendSize; if(dest < arraySize_Z_dest%nprocs) { sendSize++; } sendSize = (sendSize+2)*arraySize_X_src*arraySize_Y_src; if(dest >= arraySize_Z_dest%nprocs) sendOffset += arraySize_Z_dest%nprocs; else sendOffset += dest; sendOffset = sendOffset*arraySize_X_src*arraySize_Y_src; #if debug cout << "Master send size " << sendSize<< " from offset " << sendOffset << " " << " to " << dest << endl; #endif MPI_Isend(sourceDataPointer+sendOffset, sendSize,MPI_DOUBLE, dest, send_tag, MPI_COMM_WORLD,&send_reqs[dest-1]); // int idx; // for(idx = 0; idx < sendSize; ++idx) // printf("Source send to dest:%d result %d: %f\n",dest, idx, sourceDataPointer[offsetsrc+sendOffset+idx]); } MPI_Waitall(nprocs-1,send_reqs,send_status); } // local copy (this is optional, but simplier for transformation) memcpy(distsrc,sourceDataPointer+copyOffset,copySize*sizeof(double)); // computation for (k = lb2dest; k < distdestsize; ++k) { for (j = lb1dest; j <= ub1dest; ++j) { for (i = lb0dest; i <= ub0dest; ++i) { distdest[arraySize_X_dest * (arraySize_Y_dest * k + j) + i] = distsrc[arraySize_X_src * (arraySize_Y_src * ((k-lb2src) + -1) + (j-lb1src)) + (i-lb0src)] + \ distsrc[arraySize_X_src * (arraySize_Y_src * ((k-lb2src) + 1) + (j-lb1src)) + (i-lb0src)] + \ distsrc[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + ((j-lb1src) + -1)) + (i-lb0src)] + \ distsrc[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + ((j-lb1src) + 1)) + (i-lb0src)] + \ distsrc[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + (j-lb1src)) + ((i-lb0src) + -1)] + \ distsrc[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + (j-lb1src)) + ((i-lb0src) + 1)] + distsrc[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + (j-lb1src)) + (i-lb0src)] * -6.00000; #if debug cout << "rank0 " << i << " " << j << " " << k << " " << distdest[arraySize_X_dest * (arraySize_Y_dest * k + j) + i] << "= " << \ distsrc[arraySize_X_src * (arraySize_Y_src * ((k-lb2src) + -1) + (j-lb1src)) + (i-lb0src)] << "+" << \ distsrc[arraySize_X_src * (arraySize_Y_src * ((k-lb2src) + 1) + (j-lb1src)) + (i-lb0src)] << "+" << \ distsrc[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + ((j-lb1src) + -1)) + (i-lb0src)] << "+" << \ distsrc[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + ((j-lb1src) + 1)) + (i-lb0src)] << "+" << \ distsrc[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + (j-lb1src)) + ((i-lb0src) + -1)] << "+" << \ distsrc[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + (j-lb1src)) + ((i-lb0src) + 1)] << "+" << \ distsrc[arraySize_X_src * (arraySize_Y_src * (k-lb2src) + (j-lb1src)) + (i-lb0src)] << "* -6.00000" << endl; #endif } } } // team leader receives data to all members int src, recv_tag=1; MPI_Request recv_reqs[nprocs-1]; MPI_Status recv_status[nprocs-1]; for(src = 1; src < nprocs; ++src) { int recvSize = arraySize_Z_dest / nprocs; int recvOffset = src * recvSize; if(src < arraySize_Z_dest%nprocs) { recvSize++; } recvSize *= arraySize_X_dest*arraySize_Y_dest; if(src >= arraySize_Z_dest%nprocs) recvOffset += arraySize_Z_dest%nprocs; else recvOffset += src; recvOffset = recvOffset*arraySize_X_dest*arraySize_Y_dest; MPI_Irecv(destinationDataPointer_new+recvOffset, recvSize, MPI_DOUBLE, src, recv_tag, MPI_COMM_WORLD,&recv_reqs[src-1]); } MPI_Waitall(nprocs-1,recv_reqs,recv_status); // local copy (this could be optional, but simplier for transformation) memcpy(destinationDataPointer_new+offsetdest,distdest,distdestsize*bxdest.size(0)*bxdest.size(1)*sizeof(double)); end = MPI_Wtime(); elapsed_secs = (end - begin); cout << "Exec. time for MPI code: " << elapsed_secs << endl; #if debug cout <<" MPI result " << endl; Adest_new.print(); cout << endl; #endif assert(checksum(Adest, Adest_new)==0); return 0; }
/* Check the return from the routine */ static int checkType(const char str[], int p, int r, int f90kind, int err, MPI_Datatype dtype) { int errs = 0; if (dtype == MPI_DATATYPE_NULL) { printf("Unable to find a real type for (p=%d,r=%d) in %s\n", p, r, str); errs++; } if (err) { errs++; MTestPrintError(err); } if (!errs) { int nints, nadds, ndtypes, combiner; /* Check that we got the correct type */ MPI_Type_get_envelope(dtype, &nints, &nadds, &ndtypes, &combiner); if (combiner != f90kind) { errs++; printf("Wrong combiner type (got %d, should be %d) for %s\n", combiner, f90kind, str); } else { int parms[2]; MPI_Datatype outtype; parms[0] = 0; parms[1] = 0; if (ndtypes != 0) { errs++; printf ("Section 8.6 states that the array_of_datatypes entry is empty for the create_f90 types\n"); } MPI_Type_get_contents(dtype, 2, 0, 1, parms, 0, &outtype); switch (combiner) { case MPI_COMBINER_F90_REAL: case MPI_COMBINER_F90_COMPLEX: if (nints != 2) { errs++; printf("Returned %d integer values, 2 expected for %s\n", nints, str); } if (parms[0] != p || parms[1] != r) { errs++; printf("Returned (p=%d,r=%d); expected (p=%d,r=%d) for %s\n", parms[0], parms[1], p, r, str); } break; case MPI_COMBINER_F90_INTEGER: if (nints != 1) { errs++; printf("Returned %d integer values, 1 expected for %s\n", nints, str); } if (parms[0] != p) { errs++; printf("Returned (p=%d); expected (p=%d) for %s\n", parms[0], p, str); } break; default: errs++; printf("Unrecognized combiner for %s\n", str); break; } } } if (!errs) { char buf0[64]; /* big enough to hold any single type */ char buf1[64]; /* big enough to hold any single type */ MPI_Request req[2]; int dt_size = 0; /* check that we can actually use the type for communication, * regression for tt#1028 */ err = MPI_Type_size(dtype, &dt_size); check_err(MPI_Type_size); assert(dt_size <= sizeof(buf0)); memset(buf0, 0, sizeof(buf0)); memset(buf1, 0, sizeof(buf1)); if (!errs) { err = MPI_Isend(&buf0, 1, dtype, 0, 42, MPI_COMM_SELF, &req[0]); check_err(MPI_Isend); } if (!errs) { err = MPI_Irecv(&buf1, 1, dtype, 0, 42, MPI_COMM_SELF, &req[1]); check_err(MPI_Irecv); } if (!errs) { err = MPI_Waitall(2, req, MPI_STATUSES_IGNORE); check_err(MPI_Waitall); } } return errs; }
int main(int argc, char *argv[]) { int errs = 0; int rank, size, source, dest; MPI_Comm comm; MPI_Status status; MPI_Request req[4]; static int bufsizes[4] = { 1, 100, 10000, 1000000 }; char *bufs[4]; int flag, i; MTest_Init(&argc, &argv); comm = MPI_COMM_WORLD; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size); source = 0; dest = size - 1; if (rank == source) { MPI_Send(MPI_BOTTOM, 0, MPI_CHAR, dest, 1, MPI_COMM_WORLD); } else if (rank == dest) { /* Create 3 requests to cancel, plus one to use. * Then receive one message and exit */ for (i = 0; i < 4; i++) { bufs[i] = (char *) malloc(bufsizes[i]); MPI_Irecv(bufs[i], bufsizes[i], MPI_CHAR, source, i, MPI_COMM_WORLD, &req[i]); } /* Now, cancel them in a more interesting order, to ensure that the * queue operation work properly */ MPI_Cancel(&req[2]); MPI_Wait(&req[2], &status); MTestPrintfMsg(1, "Completed wait on irecv[2]\n"); MPI_Test_cancelled(&status, &flag); if (!flag) { errs++; printf("Failed to cancel a Irecv[2] request\n"); fflush(stdout); } MPI_Cancel(&req[3]); MPI_Wait(&req[3], &status); MTestPrintfMsg(1, "Completed wait on irecv[3]\n"); MPI_Test_cancelled(&status, &flag); if (!flag) { errs++; printf("Failed to cancel a Irecv[3] request\n"); fflush(stdout); } MPI_Cancel(&req[0]); MPI_Wait(&req[0], &status); MTestPrintfMsg(1, "Completed wait on irecv[0]\n"); MPI_Test_cancelled(&status, &flag); if (!flag) { errs++; printf("Failed to cancel a Irecv[0] request\n"); fflush(stdout); } MPI_Wait(&req[1], &status); MPI_Test_cancelled(&status, &flag); if (flag) { errs++; printf("Incorrectly cancelled Irecv[1]\n"); fflush(stdout); } for (i = 0; i < 4; i++) { free(bufs[i]); } } MTest_Finalize(errs); MPI_Finalize(); return 0; }
int main (int argc, char *argv[]){ MPI_Init (&argc,&argv); MPI_Comm_size (MPI_COMM_WORLD,&numtasks); MPI_Comm_rank (MPI_COMM_WORLD,&rank); STEPS = 50; NX = 800; NY = 400; NZ = 30; x_partition_count = 2; y_partition_count = 1; z_partition_count = 1; for(i=1; i<argc; ++i) { if(!strcmp(argv[i], "s")) STEPS = atoi(argv[i+1]); else if(!strcmp(argv[i], "x")) NX = atoi(argv[i+1]); else if(!strcmp(argv[i],"y")) NY = atoi(argv[i+1]); else if(!strcmp(argv[i],"z")) NZ = atoi(argv[i+1]); else if(!strcmp(argv[i],"px")) x_partition_count = atoi(argv[i+1]); else if(!strcmp(argv[i],"py")) y_partition_count = atoi(argv[i+1]); else if(!strcmp(argv[i],"pz")) z_partition_count = atoi(argv[i+1]); } rank_mass = total_mass/(double)numtasks; if(rank == 0) { printf("\n\n==================================\n==========SESSION START===========\n==================================\n" "Program size (x/y/z/steps): %d x %d x %d x %d\n" "Partition grid (x/y/z): %d x %d x %d\n", NX, NY, NZ, STEPS, x_partition_count, y_partition_count, z_partition_count); fp = fopen("log.txt", "a"); fprintf(fp,"%dx%dx%dx%d\n%dx%dx%d\n%d processes\n\n", NX, NY, NZ, STEPS, x_partition_count, y_partition_count, z_partition_count, x_partition_count * y_partition_count * z_partition_count); if(!(NX && NY && NZ && x_partition_count && y_partition_count && z_partition_count)){ puts("Elements/Grid zero, cannot continue\n"\ "Use -x <number> to input x elements count\n"\ "Use -y <number> to input y elements count\n"\ "Use -z <number> to input z elements count\n"\ "Use -s <number> to input step count\n"\ "Use -px <number> to input x-dimension partition count\n"\ "Use -py <number> to input y-dimension partition count\n"\ "Use -pz <number> to input z-dimension partition count\n"\ ); //getchar(); return; } } #pragma omp parallel { if((rank==0) && (omp_get_thread_num() == 0)) printf("Internal element processing threads (OpenMP parallelization): %d\n", omp_get_num_threads()); } MPI_Barrier(MPI_COMM_WORLD); //for printf to apper in order // ================================START Data partition assignment================================ x_length = NX/x_partition_count; //Divide elements uniformly among partitions x_rank = rank % x_partition_count; //rank, as is "partition rank" x_start = x_rank * x_length ; //min x_end = (x_rank+1) * x_length - 1; y_length = NY/y_partition_count; y_rank = (rank / x_partition_count ) % y_partition_count; //rank, as is "partition rank" y_start = y_rank * y_length; //min y_end = (y_rank+1) * y_length - 1; z_length = NZ/z_partition_count; z_rank = rank / (x_partition_count*y_partition_count); z_start = z_rank * z_length; //min z_end = (z_rank+1) * z_length - 1; printf("Rank %d range: x(%d-%d) of %d, y(%d-%d) of %d, z(%d-%d) of %d\n", rank, x_start, x_end, NX, y_start, y_end, NY, z_start, z_end, NZ); //================================END Data partition assignment================================ //=====================================START Initialization==================================== duration_sendrecv = 0.0; duration_internal = 0.0; duration_busywait = 0.0; duration_external = 0.0; duration_mass_reduce= 0.0; duration_waitsend = 0.0; //Each of the arrays needs to have a size of (x+4)*(y +4)* z elements. //The size must be identical for all so that MPI datatype column will work correctly. //The +4 is the Halo zone for receives. u[0] = (struct element*) malloc((x_length+4)*(y_length+4)*z_length*sizeof(struct element)); u[1] = (struct element*) malloc((x_length+4)*(y_length+4)*z_length*sizeof(struct element)); for (iz=0; iz<z_length; ++iz) for (iy=0; iy<y_length+4; ++iy) for (ix=0; ix<x_length+4; ++ix){ (u[0]+c(ix,iy,iz))->mass = total_mass/numtasks/NX/NY/NZ; (u[0]+c(ix,iy,iz))->xy_value = (double)(rand() % 100); (u[0]+c(ix,iy,iz))->z_value = a*pow((u[0]+c(ix,iy,iz))->xy_value,10.0); (u[1]+c(ix,iy,iz))->mass = total_mass/numtasks/NX/NY/NZ; (u[1]+c(ix,iy,iz))->xy_value = 0.0; } //iz: Track which of the u arrays is the "old" iu = 0; //sprintf(filename,"atm%ds%d.txt", rank, it); //prtdat(filename); //printf("Rank %d saving in %s\n", rank, filename); // for printf to apper in order MPI_Barrier(MPI_COMM_WORLD); // DATATYPE: Notice how column size(1st arg) depends on partition size // an element consists of 3 doubles MPI_Type_vector (2, 3*x_length, 3*(x_length+4), MPI_DOUBLE, &xmargindata); MPI_Type_commit (&xmargindata); MPI_Type_vector (y_length, 6, 3*(x_length+4), MPI_DOUBLE, &ymargindata); MPI_Type_commit (&ymargindata); sizes[2] = 3*(x_length+4); sizes[1] = y_length+4; sizes[0] = z_length; subsizes_right_left[2] = 3*2; subsizes_right_left[1] = y_length; subsizes_right_left[0] = z_length; starts_right_left[0] = 0; starts_right_left[1] = 0; starts_right_left[2] = 0; MPI_Type_create_subarray(3, sizes, subsizes_right_left, starts_right_left, MPI_ORDER_C, MPI_DOUBLE, &right_left_type); MPI_Type_commit (&right_left_type); subsizes_down_up[2] = 3*x_length; subsizes_down_up[1] = 2; subsizes_down_up[0] = z_length; MPI_Type_create_subarray(3, sizes, subsizes_down_up, starts_right_left, MPI_ORDER_C, MPI_DOUBLE, &down_up_type); MPI_Type_commit (&down_up_type); printf("Rank %d has finished initialisation\n", rank); //==============================================END Initialization============================================== //Main Computation for (it = 1; it <= STEPS; ++it) { if(rank == 0) printf("Step %d\n", it); time_start_sendrecv = MPI_Wtime(); //printf("Rank %d starts iteration %d\n",rank,it); /* if(STEPS==1) printf("Rank %d neighbours: U %d D %d L %d R %d\n",rank, ((rank+x_partition_count) % (x_partition_count*y_partition_count) + (x_partition_count*y_partition_count)*(rank/x_partition_count/y_partition_count)), ((rank-x_partition_count+x_partition_count*y_partition_count) % (x_partition_count*y_partition_count) + (x_partition_count*y_partition_count)*(rank/x_partition_count/y_partition_count)), (rank+(rank % x_partition_count ? 0 : x_partition_count)-1), (rank+((rank+1) % x_partition_count ? 0 : -x_partition_count)+1));*/ MPI_Isend(u[iu]+c(2, y_length, 0), 1, down_up_type,\ (rank+x_partition_count) % (x_partition_count*y_partition_count) + (x_partition_count*y_partition_count)*(rank/x_partition_count/y_partition_count), DTUTAG, MPI_COMM_WORLD, req_send + 0); MPI_Irecv(u[iu]+c(2, y_length+2, 0), 1, down_up_type,\ (rank+x_partition_count) % (x_partition_count*y_partition_count) + (x_partition_count*y_partition_count)*(rank/x_partition_count/y_partition_count), UTDTAG, MPI_COMM_WORLD, req_recv + 0); MPI_Isend(u[iu]+c(2,2,0), 1, down_up_type,\ (rank-x_partition_count+x_partition_count*y_partition_count) % (x_partition_count*y_partition_count) + (x_partition_count*y_partition_count)*(rank/x_partition_count/y_partition_count), UTDTAG, MPI_COMM_WORLD, req_send + 1); MPI_Irecv(u[iu]+c(2,0,0), 1, down_up_type,\ (rank-x_partition_count+x_partition_count*y_partition_count) % (x_partition_count*y_partition_count) + (x_partition_count*y_partition_count)*(rank/x_partition_count/y_partition_count), DTUTAG, MPI_COMM_WORLD, req_recv+1); // use % due to spatial wraparound MPI_Isend(u[iu]+c(2,2,0), 1, right_left_type,\ rank+(rank % x_partition_count ? 0 : x_partition_count)-1, RTLTAG, MPI_COMM_WORLD, req_send+2); MPI_Irecv(u[iu]+c(0,2,0), 1, right_left_type,\ rank+(rank % x_partition_count ? 0 : x_partition_count)-1, LTRTAG, MPI_COMM_WORLD, req_recv+2); MPI_Isend(u[iu]+c(x_length,2,0), 1, right_left_type,\ rank+((rank+1) % x_partition_count ? 0 : -x_partition_count)+1, LTRTAG, MPI_COMM_WORLD, req_send+3); MPI_Irecv(u[iu]+c(x_length+2,2,0), 1, right_left_type,\ rank+((rank+1) % x_partition_count ? 0 : -x_partition_count)+1, RTLTAG, MPI_COMM_WORLD, req_recv+3); //printf("Rank %d has finished nonblocking sendrecvs\n", rank); duration_sendrecv += MPI_Wtime() - time_start_sendrecv; //begin update of internal elements time_start_internal = MPI_Wtime(); #pragma omp parallel { #pragma omp for for(iz=0; iz<z_length; ++iz){ //full z range //printf("Iteration %d is assigned to thread %d\n", iz, omp_get_thread_num()); //disregard both the data waiting to be received (width 2 perimeter) and the ones //who need them to be calculated (another 2 width perimeter)(central elements) for(iy=4; iy<y_length; ++iy) for(ix=4; ix<x_length; ++ix) update(ix, iy, iz, u[iu], u[1-iu]); } } duration_internal += MPI_Wtime() - time_start_internal; // printf("Rank %d has finished internal elements\n", rank); // finished update of internal elements time_start_busywait = MPI_Wtime(); done_count = 0; memset(done, 0, 4*sizeof(int)); while(done_count<4){ for(i=0; i<4; ++i) if(!done[i]){ MPI_Test(req_recv+i, done+i, MPI_STATUS_IGNORE); if(done[i]){ switch(i){ case 0: for(iz=0; iz<z_length; ++iz) //full z range for(iy=y_length; iy<y_length+2; ++iy) for(ix=2; ix<x_length+2; ++ix) update(ix,iy,iz,u[iu],u[1-iu]);//update top row except corners break; case 1: for(iz=0; iz<z_length; ++iz) //full z range for(iy=2; iy<4; ++iy) for(ix=2; ix<x_length+2; ++ix) update(ix,iy,iz,u[iu],u[1-iu]);//update bottom row except corners break; case 2: for(iz=0; iz<z_length; ++iz) //full z range for(ix=2; ix<4; ++ix) for(iy=2; iy<y_length+2; ++iy) update(ix,iy,iz,u[iu],u[1-iu]);//update left column except corners break; case 3: for(iz=0;iz<z_length;iz++) //full z range for(ix=x_length;ix<x_length+2;ix++) for(iy=2;iy<y_length+2;iy++) update(ix,iy,iz,u[iu],u[1-iu]);//update right column except corners } ++done_count; }//end if(done[i]) }//end if(!done[i]). }//end while(done_count<4) //printf("Rank %d has finished busywait phase\n", rank); duration_busywait += MPI_Wtime() - time_start_busywait; time_start_external = MPI_Wtime(); for(iz=0; iz<z_length; ++iz) //full z range for(iy=2*y_length-2; iy<2*y_length+2; ++iy) for(ix=2*x_length-2; ix<2*x_length+2; ++ix) update(ix%x_length+2,iy%y_length+2,iz,u[iu],u[1-iu]);//update the four corners duration_external += MPI_Wtime() - time_start_external; time_start_mass_reduce = MPI_Wtime(); if(it % reduce_frequency == 0){ MPI_Reduce(&rank_mass, &mass_reduced, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if(rank == 0) printf("Step %d: Rank %d reduced total mass of %f\n", it, rank, mass_reduced); } duration_mass_reduce += MPI_Wtime() - time_start_mass_reduce; time_start_waitsend = MPI_Wtime(); for(i=0; i<4; ++i) MPI_Wait(req_send+i, MPI_STATUS_IGNORE);//Wait for the sends //MPI_Barrier(MPI_COMM_WORLD); //printf("rank %d finished MPI_Waits at step %d\n", rank, it); //Revert arrays iu = 1-iu; duration_waitsend += MPI_Wtime() - time_start_waitsend; //sprintf(filename,"atm%ds%d.txt", rank, it); //prtdat(filename); }//end STEPS iteration MPI_Reduce(&duration_sendrecv ,&total_sendrecv ,1, MPI_DOUBLE,MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&duration_busywait ,&total_busywait ,1, MPI_DOUBLE,MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&duration_internal ,&total_internal ,1, MPI_DOUBLE,MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&duration_external ,&total_external ,1, MPI_DOUBLE,MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&duration_mass_reduce,&total_mass_reduce,1, MPI_DOUBLE,MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&duration_waitsend ,&total_waitsend ,1, MPI_DOUBLE,MPI_SUM, 0, MPI_COMM_WORLD); total_accounted_for = total_sendrecv + total_internal + total_external + total_busywait + total_mass_reduce + total_waitsend; if(!rank) printf("Time elapsed: %f seconds\n", total_accounted_for); if(!rank) printf("Durations:\nSend/Recv = %f\nInternal = %f\nBusywait = %f\nExternal = %f\nReduce mass = %f\nWait sends = %f\n\n" "Respective percentages (based on accounted for):\nSend/Recv = %f\nInternal = %f\nBusywait = %f\nExternal = %f\nReduce mass = %f\nWait sends = %f\n\n", total_sendrecv,total_internal,total_busywait,total_external,total_mass_reduce,total_waitsend, 100.0 * total_sendrecv /total_accounted_for, 100.0 * total_internal /total_accounted_for, 100.0 * total_busywait /total_accounted_for, 100.0 * total_external /total_accounted_for, 100.0 * total_mass_reduce/total_accounted_for, 100.0 * total_waitsend /total_accounted_for); if(rank==0) { fprintf(fp,"Total/Sendrecv/internal/busywait/external/mass reduce/waitsend durations:\n%f\t%f\t%f\t%f\t%f\t%f\t%f\n\n\n", total_accounted_for, total_sendrecv, total_internal, total_busywait, total_external, total_mass_reduce, total_waitsend); fclose(fp); } if(!rank) printf("\n\n==================================\n===========SESSION END============\n==================================\n\n\n"); MPI_Finalize(); return 0; }
int main (int argc, char *argv[]) { int my_rank, proc_num; MPI_Status status; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &proc_num); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); double diff; /* change in value */ int i, j, m, n; int N=DEFAULT_N; double epsilon=0.01; double mean; FILE *fp; /* Argument processing */ int edgeElems = DEFAULT_ELEM; /* edge elements */ int cfreq = DEFAULT_FREQ; /* checkpoint frequency */ char *cpath = DEFAULT_PATH; /* checkpoint path */ int nok = 0; /* arguments not OK */ int pinit=1; char *s; while (--argc > 0 && (*++argv)[0] == '-') { for(s=argv[0]+1;*s;s++) switch (*s) { case 'd': if (isdigit(s[1])) edgeElems = atoi(s+1); else nok = 1; s+=strlen(s+1); break; case 'c': if (isdigit(s[i])) cfreq = atoi(s+1); else nok = 1; s+=strlen(s+1); break; case 'p': cpath = s+1; s+=strlen(s+1); break; case 'r': pinit = 0; break; case 'n': if (isdigit(s[1])) N = atoi(s+1); else nok = 1; s+=strlen(s+1); break; case 'e': if (isdigit(s[1])) epsilon = atof(s+1); else nok = 1; s+=strlen(s+1); break; default: nok = 1; break; } } if (nok) { fprintf(stderr, "Usage: %s -e<int> -c<int> -p<str> -r -n<int> -epsilon<double>\n", argv[0]); fprintf(stderr, " -d edge elements, default: %d\n", DEFAULT_ELEM); fprintf(stderr, " -c checkpoint frequency, default: %d\n", DEFAULT_FREQ); fprintf(stderr, " -p path to checkpoint file, default: %s\n", DEFAULT_PATH); fprintf(stderr, " -r restore\n"); fprintf(stderr, " -n size of n, default:1000\n"); fprintf(stderr, " -e epsilon, default:0.01\n"); exit(EXIT_FAILURE); } #ifdef DEBUG if(my_rank==0) printf("n=%d, epsilon=%lf\n", N, epsilon); #endif if(N>1000){ printf("Too big value for N, use no more than 1000, or change DEFAULT_N\n"); return 0; } // Persistent memory initialization const char *mode = (pinit) ? "w+" : "r+"; char back_fname[128]; char my_rank_str[4]; perm(PERM_START, PERM_SIZE); strcpy(back_fname, cpath); strcat(back_fname,"hw5_mpi.back."); sprintf(my_rank_str, "%d", my_rank); strcat(back_fname,my_rank_str); // printf("mopen: %s\n", back_fname); mopen(back_fname, mode, MMAP_SIZE); strcpy(back_fname, cpath); strcat(back_fname,"hw5_mpi.mmap."); strcat(back_fname,my_rank_str); // printf("bopen: %s\n", back_fname); bopen(back_fname, mode); if (!pinit){ restore(); printf("Resotored, iter=%d, myN=%d\n", iter, myN); } else{ iter = 0; /* Set boundary values and compute mean boundary value */ mean = 0.0; for (i=0; i<N; i++) { u[i][0] = u[i][N-1] = u[0][i] = 100.0; u[N-1][i] = 0.0; mean += u[i][0] + u[i][N-1] + u[0][i] + u[N-1][i]; } mean /= (4.0 *N); /* Initialize interior values */ for (i =1; i<N-1; i++) for (j=1; j<N-1; j++) u[i][j] = mean; // distribute data myN = N / proc_num; if(N%proc_num!=0){ if(my_rank==proc_num-1) myN=N-(proc_num-1)*myN; } if(proc_num > 1) { // ghost rows if(my_rank == 0 || my_rank == proc_num - 1) myN++; else myN += 2; } // initial value for(i = 0; i < myN; i++) { for(j = 0; j < N; j++) { if(my_rank == 0) myu[i][j] = u[i][j]; else myu[i][j] = u[my_rank*(N/proc_num)-1+i][j]; myw[i][j]=myu[i][j]; } } mflush(); backup(); } struct timeval start_tv, end_tv; gettimeofday(&start_tv, NULL); double alldiff=0; int left = my_rank - 1; int right = my_rank +1; MPI_Request send_req1, recv_req1; MPI_Request send_req2, recv_req2; while(1) { iter++; diff = 0.0; #pragma omp parallel for schedule(static) default(shared) private(i,j) for (i=1; i<myN-1; i++) { for (j=1; j<N-1; j++) { myw[i][j] = (myu[i-1][j] + myu[i+1][j] + myu[i][j-1] + myu[i][j+1])/4.0; #pragma critical if (fabs (myw[i][j] - myu[i][j]) > diff) diff = fabs(myw[i][j] - myu[i][j]); } } // reduce diff MPI_Allreduce(&diff, &alldiff, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); #ifdef PRINTITER if(my_rank==0){ printf("iter=%d, diff=%lf\n", iter, alldiff); fflush(stdout); } #endif if (alldiff <= epsilon) break; if(proc_num > 1) { // send second top row if(my_rank != 0){ MPI_Isend(myw[1], N, MPI_DOUBLE, left, 0, MPI_COMM_WORLD, &send_req1); //printf("Send: %d->%d\n", my_rank, left); } // send second to bottom row if(my_rank != proc_num - 1){ MPI_Isend(myw[myN-2], N, MPI_DOUBLE, right, 1, MPI_COMM_WORLD, &send_req2); //printf("Send %d->%d\n", my_rank, right); } // recive top if(my_rank != 0){ MPI_Irecv(myw[0], N, MPI_DOUBLE, left, 1, MPI_COMM_WORLD, &recv_req1); //printf("Recv: %d->%d\n", my_rank, left); } // receive bottom if(my_rank != proc_num - 1) { MPI_Irecv(myw[myN-1], N, MPI_DOUBLE, right, 0, MPI_COMM_WORLD, &recv_req2); //printf("Recv %d->%d\n", my_rank, right); } if(my_rank != 0) MPI_Wait(&send_req1, &status); if(my_rank != proc_num - 1) MPI_Wait(&send_req2, &status); if(my_rank != 0) MPI_Wait(&recv_req1, &status); if(my_rank != proc_num - 1) MPI_Wait(&recv_req2, &status); } #pragma omp parallel for schedule(static) default(shared) private(i,j) for (i=0; i<myN; i++) { if( (i==0&&my_rank==0) ||(i==myN-1&&my_rank==proc_num-1)) continue; for (j=1; j<N-1; j++) myu[i][j] = myw[i][j]; } // backup if(iter%cfreq == 0) backup(); } gettimeofday(&end_tv, NULL); printf("Elapsed time: %f sec\n", (double)( (double)(end_tv.tv_sec - start_tv.tv_sec) + ( (double)(end_tv.tv_usec - start_tv.tv_usec)/1000000)) ); // gather data if(my_rank==0) { for (i=0; i<myN; i++) { for(j=0; j<N; j++) { u[i][j] = myu[i][j]; } } if(proc_num > 1) { for (i=1; i<proc_num-1; i++) MPI_Recv(u[i*(N/proc_num)], (N/proc_num)*N, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &status); // special care for last one if(N%proc_num==0) MPI_Recv(u[i*(N/proc_num)], (N/proc_num)*N, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &status); else{ MPI_Recv(u[i*(N/proc_num)], (N-(N/proc_num)*(proc_num-1))*N, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &status); } } } else { if(N%proc_num==0) MPI_Send(myu[1], (N/proc_num)*N, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); else{ if(my_rank != proc_num-1) MPI_Send(myu[1], (myN-2)*N, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); else MPI_Send(myu[1], (myN-1)*N, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); } } if(my_rank == 0) { /* Print Solution */ fp = fopen("output.dat", "w"); for (i=0; i<N; i++) { for (j=0; j<N; j++) { fprintf(fp, "%6.2f ", u[i][j]); } fprintf(fp, "\n"); } fclose(fp); } mclose(); bclose(); MPI_Finalize(); return 0; }
void wet::exchangeDensities_ffgg(void) { //if(t%infoStep==0) // cout << "Process " << rank << ": exchanging densities...." << endl; MPI_Status statusff1, statusff2, statusffa, statusffb, statusffc, statusffd, statusffi, statusffj, statusffk, statusffl, statusgg1, statusgg2, statusgga, statusggb, statusggc, statusggd, statusggi, statusggj, statusggk, statusggl; MPI_Request requestOutff1, requestOutff2, requestInff1, requestInff2, requestOutffa, requestOutffb, requestInffa, requestInffb, requestOutffc, requestOutffd, requestInffc, requestInffd, requestOutffi, requestOutffj, requestInffi, requestInffj, requestOutffk, requestOutffl, requestInffk, requestInffl,requestOutgg1, requestOutgg2, requestIngg1, requestIngg2, requestOutgga, requestOutggb, requestIngga, requestInggb, requestOutggc, requestOutggd, requestInggc, requestInggd, requestOutggi, requestOutggj, requestInggi, requestInggj, requestOutggk, requestOutggl, requestInggk, requestInggl; //SENDING FF DENSITIES //Sending right (for sender) ff1 (only right part, because ff1 moves in x direction) MPI_Isend(&(ff1[k2-k1]),k1, MPI_DOUBLE, rightProcess, rank*100 , MPI_COMM_WORLD, &requestOutff1); //Sending left (for sender) ff2 (only left part, because ff2 moves in -x direction) MPI_Isend(&(ff2[k1]),k1, MPI_DOUBLE, leftProcess, rank*100+1 , MPI_COMM_WORLD, &requestOutff2); //Sending right (for sender) ffa (only right part, because ffa moves in x direction) MPI_Isend(&(ffa[k2-k1]),k1, MPI_DOUBLE, rightProcess, rank*100+2 , MPI_COMM_WORLD, &requestOutffa); //Sending left (for sender) ffb (only left part, because ffb moves in -x direction) MPI_Isend(&(ffb[k1]),k1, MPI_DOUBLE, leftProcess, rank*100+3 , MPI_COMM_WORLD, &requestOutffb); //Sending right (for sender) ffc (only right part, because ffc moves in x direction) MPI_Isend(&(ffc[k2-k1]),k1, MPI_DOUBLE, rightProcess, rank*100+4 , MPI_COMM_WORLD, &requestOutffc); //Sending left (for sender) ffd (only left part, because ffd moves in -x direction) MPI_Isend(&(ffd[k1]),k1, MPI_DOUBLE, leftProcess, rank*100+5 , MPI_COMM_WORLD, &requestOutffd); //Sending right (for sender) ffi (only right part, because ffi moves in x direction) MPI_Isend(&(ffi[k2-k1]),k1, MPI_DOUBLE, rightProcess, rank*100+6 , MPI_COMM_WORLD, &requestOutffi); //Sending left (for sender) ffj (only left part, because ffj moves in -x direction) MPI_Isend(&(ffj[k1]),k1, MPI_DOUBLE, leftProcess, rank*100+7 , MPI_COMM_WORLD, &requestOutffj); //Sending right (for sender) ffk (only right part, because ffk moves in x direction) MPI_Isend(&(ffk[k2-k1]),k1, MPI_DOUBLE, rightProcess, rank*100+8 , MPI_COMM_WORLD, &requestOutffk); //Sending left (for sender) ffl (only left part, because ffl moves in -x direction) MPI_Isend(&(ffl[k1]),k1, MPI_DOUBLE, leftProcess, rank*100+9 , MPI_COMM_WORLD, &requestOutffl); //RECIEVING FF DENSITIES //Recieving left (for reciever) ff1 (only left part, because ff1 moves in x direction) MPI_Irecv(ff1, k1, MPI_DOUBLE, leftProcess, leftProcess*100, MPI_COMM_WORLD, &requestInff1); //Recieving right (for reciever) ff2 (only right part, because ff2 moves in -x direction) MPI_Irecv(&(ff2[k2]), k1, MPI_DOUBLE, rightProcess, rightProcess*100+1, MPI_COMM_WORLD, &requestInff2); //Recieving left (for reciever) ffa (only left part, because ffa moves in x direction) MPI_Irecv(ffa, k1, MPI_DOUBLE, leftProcess, leftProcess*100+2, MPI_COMM_WORLD, &requestInffa); //Recieving right (for reciever) ffb (only right part, because ffb moves in -x direction) MPI_Irecv(&(ffb[k2]), k1, MPI_DOUBLE, rightProcess, rightProcess*100+3, MPI_COMM_WORLD, &requestInffb); //Recieving left (for reciever) ff1 (only left part, because ffc moves in x direction) MPI_Irecv(ffc, k1, MPI_DOUBLE, leftProcess, leftProcess*100+4, MPI_COMM_WORLD, &requestInffc); //Recieving right (for reciever) ff2 (only right part, because ffd moves in -x direction) MPI_Irecv(&(ffd[k2]), k1, MPI_DOUBLE, rightProcess, rightProcess*100+5, MPI_COMM_WORLD, &requestInffd); //Recieving left (for reciever) ffa (only left part, because ffi moves in x direction) MPI_Irecv(ffi, k1, MPI_DOUBLE, leftProcess, leftProcess*100+6, MPI_COMM_WORLD, &requestInffi); //Recieving right (for reciever) ffb (only right part, because ffj moves in -x direction) MPI_Irecv(&(ffj[k2]), k1, MPI_DOUBLE, rightProcess, rightProcess*100+7, MPI_COMM_WORLD, &requestInffj); //Recieving left (for reciever) ffk (only left part, because ffk moves in x direction) MPI_Irecv(ffk, k1, MPI_DOUBLE, leftProcess, leftProcess*100+8, MPI_COMM_WORLD, &requestInffk); //Recieving right (for reciever) ffl (only right part, because ffl moves in -x direction) MPI_Irecv(&(ffl[k2]), k1, MPI_DOUBLE, rightProcess, rightProcess*100+9, MPI_COMM_WORLD, &requestInffl); //SENDING GG DENSITIES //Sending right (for sender) gg1 (only right part, because gg1 moves in x direction) MPI_Isend(&(gg1[k2-k1]),k1, MPI_DOUBLE, rightProcess, rank*100+10 , MPI_COMM_WORLD, &requestOutgg1); //Sending left (for sender) gg2 (only left part, because gg2 moves in -x direction) MPI_Isend(&(gg2[k1]),k1, MPI_DOUBLE, leftProcess, rank*100+11 , MPI_COMM_WORLD, &requestOutgg2); //Sending right (for sender) gga (only right part, because gga moves in x direction) MPI_Isend(&(gga[k2-k1]),k1, MPI_DOUBLE, rightProcess, rank*100+12 , MPI_COMM_WORLD, &requestOutgga); //Sending left (for sender) ggb (only left part, because ggb moves in -x direction) MPI_Isend(&(ggb[k1]),k1, MPI_DOUBLE, leftProcess, rank*100+13 , MPI_COMM_WORLD, &requestOutggb); //Sending right (for sender) ggc (only right part, because ggc moves in x direction) MPI_Isend(&(ggc[k2-k1]),k1, MPI_DOUBLE, rightProcess, rank*100+14 , MPI_COMM_WORLD, &requestOutggc); //Sending left (for sender) ggd (only left part, because ggd moves in -x direction) MPI_Isend(&(ggd[k1]),k1, MPI_DOUBLE, leftProcess, rank*100+15, MPI_COMM_WORLD, &requestOutggd); //Sending right (for sender) ggi (only right part, because ggi moves in x direction) MPI_Isend(&(ggi[k2-k1]),k1, MPI_DOUBLE, rightProcess, rank*100+16 , MPI_COMM_WORLD, &requestOutggi); //Sending left (for sender) ggj (only left part, because ggj moves in -x direction) MPI_Isend(&(ggj[k1]),k1, MPI_DOUBLE, leftProcess, rank*100+17 , MPI_COMM_WORLD, &requestOutggj); //Sending right (for sender) ggk (only right part, because ggk moves in x direction) MPI_Isend(&(ggk[k2-k1]),k1, MPI_DOUBLE, rightProcess, rank*100+18 , MPI_COMM_WORLD, &requestOutggk); //Sending left (for sender) ggl (only left part, because ggl moves in -x direction) MPI_Isend(&(ggl[k1]),k1, MPI_DOUBLE, leftProcess, rank*100+19 , MPI_COMM_WORLD, &requestOutggl); //RECIEVING GG DENSITIES //Recieving left (for reciever) gg1 (only left part, because gg1 moves in x direction) MPI_Irecv(gg1, k1, MPI_DOUBLE, leftProcess, leftProcess*100+10, MPI_COMM_WORLD, &requestIngg1); //Recieving right (for reciever) gg2 (only right part, because gg2 moves in -x direction) MPI_Irecv(&(gg2[k2]), k1, MPI_DOUBLE, rightProcess, rightProcess*100+11, MPI_COMM_WORLD, &requestIngg2); //Recieving left (for reciever) gga (only left part, because gga moves in x direction) MPI_Irecv(gga, k1, MPI_DOUBLE, leftProcess, leftProcess*100+12, MPI_COMM_WORLD, &requestIngga); //Recieving right (for reciever) ggb (only right part, because ggb moves in -x direction) MPI_Irecv(&(ggb[k2]), k1, MPI_DOUBLE, rightProcess, rightProcess*100+13, MPI_COMM_WORLD, &requestInggb); //Recieving left (for reciever) gg1 (only left part, because ggc moves in x direction) MPI_Irecv(ggc, k1, MPI_DOUBLE, leftProcess, leftProcess*100+14, MPI_COMM_WORLD, &requestInggc); //Recieving right (for reciever) gg2 (only right part, because ggd moves in -x direction) MPI_Irecv(&(ggd[k2]), k1, MPI_DOUBLE, rightProcess, rightProcess*100+15, MPI_COMM_WORLD, &requestInggd); //Recieving left (for reciever) gga (only left part, because ggi moves in x direction) MPI_Irecv(ggi, k1, MPI_DOUBLE, leftProcess, leftProcess*100+16, MPI_COMM_WORLD, &requestInggi); //Recieving right (for reciever) ggb (only right part, because ggj moves in -x direction) MPI_Irecv(&(ggj[k2]), k1, MPI_DOUBLE, rightProcess, rightProcess*100+17, MPI_COMM_WORLD, &requestInggj); //Recieving left (for reciever) ggk (only left part, because ggk moves in x direction) MPI_Irecv(ggk, k1, MPI_DOUBLE, leftProcess, leftProcess*100+18, MPI_COMM_WORLD, &requestInggk); //Recieving right (for reciever) ggl (only right part, because ggl moves in -x direction) MPI_Irecv(&(ggl[k2]), k1, MPI_DOUBLE, rightProcess, rightProcess*100+19, MPI_COMM_WORLD, &requestInggl); //Waiting until ff(i) densities are saved in the recieving buffer MPI_Wait(&requestInff1, &statusff1); MPI_Wait(&requestInff2, &statusff2); MPI_Wait(&requestInffa, &statusffa); MPI_Wait(&requestInffb, &statusffb); MPI_Wait(&requestInffc, &statusffc); MPI_Wait(&requestInffd, &statusffd); MPI_Wait(&requestInffi, &statusffi); MPI_Wait(&requestInffj, &statusffj); MPI_Wait(&requestInffk, &statusffk); MPI_Wait(&requestInffl, &statusffl); //Waiting until gg(i) densities are saved in the recieving buffer MPI_Wait(&requestIngg1, &statusgg1); MPI_Wait(&requestIngg2, &statusgg2); MPI_Wait(&requestIngga, &statusgga); MPI_Wait(&requestInggb, &statusggb); MPI_Wait(&requestInggc, &statusggc); MPI_Wait(&requestInggd, &statusggd); MPI_Wait(&requestInggi, &statusggi); MPI_Wait(&requestInggj, &statusggj); MPI_Wait(&requestInggk, &statusggk); MPI_Wait(&requestInggl, &statusggl); //Waiting until ff(i) sending buffer is relased MPI_Wait(&requestOutff1, &statusff1); MPI_Wait(&requestOutff2, &statusff2); MPI_Wait(&requestOutffa, &statusffa); MPI_Wait(&requestOutffb, &statusffb); MPI_Wait(&requestOutffc, &statusffc); MPI_Wait(&requestOutffd, &statusffd); MPI_Wait(&requestOutffi, &statusffi); MPI_Wait(&requestOutffj, &statusffj); MPI_Wait(&requestOutffk, &statusffk); MPI_Wait(&requestOutffl, &statusffl); //Waiting until gg(i) sending buffer is relased MPI_Wait(&requestOutgg1, &statusgg1); MPI_Wait(&requestOutgg2, &statusgg2); MPI_Wait(&requestOutgga, &statusgga); MPI_Wait(&requestOutggb, &statusggb); MPI_Wait(&requestOutggc, &statusggc); MPI_Wait(&requestOutggd, &statusggd); MPI_Wait(&requestOutggi, &statusggi); MPI_Wait(&requestOutggj, &statusggj); MPI_Wait(&requestOutggk, &statusggk); MPI_Wait(&requestOutggl, &statusggl); MPI_Barrier(MPI_COMM_WORLD); //if(t%infoStep==0) // cout << "Process " << rank << ": dendities exchanged." << endl; }
void do_master_stuff(int argc, char ** argv, struct mw_api_spec *f) { DEBUG_PRINT(("master starting")); int number_of_slaves; MPI_Comm_size(MPI_COMM_WORLD, &number_of_slaves); LinkedList * work_list; double start, end, start_create, end_create, start_results, end_results; start = MPI_Wtime(); DEBUG_PRINT(("creating work list...")); start_create = MPI_Wtime(); work_list = listFromArray(f->create(argc, argv)); end_create = MPI_Wtime(); DEBUG_PRINT(("created work in %f seconds!", end_create - start_create)); int slave=1, num_work_units=0; num_work_units = list_length(work_list); mw_result_t * received_results = malloc(f->res_sz * num_work_units); if (received_results == NULL) { fprintf(stderr, "ERROR: insufficient memory to allocate received_results\n"); exit(0); } int num_results_received = 0; // make array keeping track of pointers for work that's active LinkedList* assignment_ptrs[number_of_slaves-2]; // make array of binary indicators for inactive workers // initially all workers are active and 0 //unsigned int inactive_workers[number_of_slaves-2]; // create array of start times double assignment_time[number_of_slaves-2]; int are_you_down[number_of_slaves-2]; // current pointer LinkedList * next_work_node = work_list, * list_end = NULL; // have supervisor so starting at 2 for(slave=2; slave<number_of_slaves; ++slave) { are_you_down[slave-2] = 0; //slaves are all working in the beginning DEBUG_PRINT(("assigning work to slave")); if(next_work_node == NULL) { DEBUG_PRINT(("reached the end of the work, breaking!")); break; } mw_work_t * work_unit = next_work_node->data; send_to_slave(work_unit, f->work_sz, MPI_CHAR, slave, WORK_TAG, MPI_COMM_WORLD); // save next_work_node to assigned work assignment_ptrs[slave-2] = next_work_node; assert(assignment_ptrs[slave-2] != NULL); // save start time assignment_time[slave-2] = MPI_Wtime(); // update next_work_node if(next_work_node->next == NULL) { list_end = next_work_node; } next_work_node=next_work_node->next; DEBUG_PRINT(("work sent to slave")); } // send time array to supervisor DEBUG_PRINT(("Sending supervisor first time update")); MPI_Send(assignment_time, number_of_slaves-2, MPI_DOUBLE, 1, SUPERVISOR_TAG, MPI_COMM_WORLD); // failure id int failure_id; MPI_Status status_fail, status_res; MPI_Request request_fail, request_res; int flag_fail = 0, flag_res = 0; // receive failure from supervisor as non-blocking recv MPI_Irecv(&failure_id, 1, MPI_INT, 1, FAIL_TAG, MPI_COMM_WORLD, &request_fail); // receive result from workers as non-blocking recv MPI_Irecv(&received_results[num_results_received], f->res_sz, MPI_CHAR, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &request_res); // send units of work while haven't received all results while(num_results_received < num_work_units) { // check for flag_fail again MPI_Test(&request_fail, &flag_fail, &status_fail); // check for flag_res again MPI_Test(&request_res, &flag_res, &status_res); // send work if have failures or got results if (flag_fail) { // change inactive workers array //inactive_workers[status_fail.MPI_SOURCE-2] = 1; DEBUG_PRINT(("received failure from supervisor, process %d", failure_id)); // get work_unit that needs to be reassigned LinkedList * work_unit = assignment_ptrs[failure_id]; if(work_unit != NULL) { DEBUG_PRINT(("Moving assignment at %p to end of the queue", work_unit)); move_node_to_end(work_unit); if(next_work_node == NULL) { next_work_node = work_unit; } assert(next_work_node != NULL); } if(assignment_time[failure_id] == 0.0) { DEBUG_PRINT(("Failure on idle process %d. WTF??", failure_id)); } if(are_you_down[failure_id] == 1) { DEBUG_PRINT(("Failure on a process which is already failed. WTF??")); } are_you_down[failure_id] = 1; //this slave is considered dead :( assignment_ptrs[failure_id] = NULL; assignment_time[failure_id] = 0.0; MPI_Send(assignment_time, number_of_slaves-2, MPI_DOUBLE, 1, SUPERVISOR_TAG, MPI_COMM_WORLD); flag_fail = 0; // continue to receive failures from supervisor as non-blocking recv MPI_Irecv(&failure_id, 1, MPI_INT, 1, FAIL_TAG, MPI_COMM_WORLD, &request_fail); } int idle_process = -1, i; for(i=0; i<number_of_slaves-2; ++i) { if(assignment_time[i] == 0.0 && !are_you_down[i]) { idle_process = i; break; } } if(next_work_node != NULL && idle_process > -1) { send_to_slave(next_work_node->data, f->work_sz, MPI_CHAR, idle_process+2, WORK_TAG, MPI_COMM_WORLD); assignment_ptrs[idle_process] = next_work_node; assignment_time[idle_process] = MPI_Wtime(); MPI_Send(assignment_time, number_of_slaves-2, MPI_DOUBLE, 1, SUPERVISOR_TAG, MPI_COMM_WORLD); DEBUG_PRINT(("Gave an assignment to previously idle process %d, assignment at %p", idle_process, next_work_node)); if(next_work_node->next == NULL) { list_end = next_work_node; } next_work_node = next_work_node->next; } if (flag_res) { int worker_number = status_res.MPI_SOURCE-2; if(!are_you_down[worker_number]) //If this slave is marked dead, just ignore him { // update number of results received num_results_received++; if(next_work_node == NULL && list_end != NULL && list_end->next != NULL) { DEBUG_PRINT(("Found more work to do, now an idle process can get an assignment")); next_work_node = list_end->next; list_end = NULL; } if(next_work_node != NULL) { // get work_unit mw_work_t* work_unit = next_work_node->data; // send new unit of work send_to_slave(work_unit, f->work_sz, MPI_CHAR, status_res.MPI_SOURCE, WORK_TAG, MPI_COMM_WORLD); // update pointer if(next_work_node->next == NULL) { list_end = next_work_node; } // update work index for new_pid assignment_ptrs[status_res.MPI_SOURCE-2] = next_work_node; assert(assignment_ptrs[status_res.MPI_SOURCE-2] != NULL); assignment_time[status_res.MPI_SOURCE-2] = MPI_Wtime(); // send updated array of times to supervisor MPI_Send(assignment_time, number_of_slaves-2, MPI_DOUBLE, 1, SUPERVISOR_TAG, MPI_COMM_WORLD); DEBUG_PRINT(("SENT TIME TO SUP")); next_work_node = next_work_node->next; if(next_work_node == NULL) { DEBUG_PRINT(("Reached the end of the work list, should get idle processors after this")); } } else { DEBUG_PRINT(("Worker %d is now idle, I ain't got shit for him to do", worker_number)); assignment_time[worker_number] = 0.0; assignment_ptrs[worker_number] = NULL; assert(!are_you_down[worker_number]); MPI_Send(assignment_time, number_of_slaves-2, MPI_DOUBLE, 1, SUPERVISOR_TAG, MPI_COMM_WORLD); } } // continue to receive results from workers as non-blocking recv MPI_Irecv(&received_results[num_results_received], f->res_sz, MPI_CHAR, MPI_ANY_SOURCE, WORK_TAG, MPI_COMM_WORLD, &request_res); } } // send kill signal to other processes, including supervisor for(slave=1; slave<number_of_slaves; ++slave) { DEBUG_PRINT(("Murdering slave")); kill_slave(slave); } start_results = MPI_Wtime(); int err_code = f->result(num_results_received, received_results); end_results = MPI_Wtime(); end = MPI_Wtime(); DEBUG_PRINT(("all %f s\n", end-start)); DEBUG_PRINT(("create %f s\n", end_create-start_create)); DEBUG_PRINT(("process %f s\n", end_results-start_results)); }