static void esync_master(VT_MPI_INT slave, MPI_Comm comm, VT_MPI_INT masterid) { int i; uint64_t tsend, trecv, tslave; uint64_t t1, t2, t3, t4; MPI_Status stat; MPI_Request req; Sync_TsPerPhase* temp; /* exchange LOOP_COUNT ping pong messages with the communication partner */ t1 = vt_pform_wtime(); PMPI_Isend( &t1, 1, MPI_LONG_LONG_INT, slave, 0, comm, &req ); PMPI_Recv( &t2, 1, MPI_LONG_LONG_INT, slave, 0, comm, &stat ); t4 = vt_pform_wtime(); t3 = t2; PMPI_Waitall( 1, &req, &stat ); for( i = 1; i < LOOP_COUNT; i++ ) { tsend = vt_pform_wtime(); /* message exchange */ PMPI_Isend(&tsend, 1, MPI_LONG_LONG_INT, slave, i, comm, &req); PMPI_Recv(&tslave, 1, MPI_LONG_LONG_INT, slave, i, comm, &stat); trecv = vt_pform_wtime(); PMPI_Waitall(1, &req, &stat); /* select timestamps with minimum message delay in each direction */ if ( ( (int64_t)tslave - (int64_t)tsend ) < ( (int64_t)t2 - (int64_t)t1 ) ) { t1 = tsend; t2 = tslave; } if ( ( (int64_t)trecv - (int64_t)tslave ) < ( (int64_t)t4 - (int64_t)t3 ) ) { t3 = tslave; t4 = trecv; } } /* save synchronization measurement data into internal data structure */ temp = (Sync_TsPerPhase*)malloc(sizeof(Sync_TsPerPhase)); if (!temp) vt_error(); temp->id1 = masterid; temp->id2 = slave; temp->t1 = t1; temp->t2 = t2; temp->t3 = t3; temp->t4 = t4; temp->next = SyncTsPerRunLast->sync_phase; SyncTsPerRunLast->sync_phase = temp; }
int MPI_Isend(MPE_CONST void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request) { int returnVal; request_list *newrq; int typesize3; /* fprintf( stderr, "MPI_Isend call on %d\n", procid_1 ); */ returnVal = PMPI_Isend( buf, count, datatype, dest, tag, comm, request ); if (dest != MPI_PROC_NULL) { rq_alloc( requests_avail_1, newrq ); if (newrq) { PMPI_Type_size( datatype, &typesize3 ); newrq->request = *request; newrq->status = RQ_SEND; newrq->size = count * typesize3; newrq->tag = tag; newrq->otherParty = dest; newrq->next = 0; rq_add( requests_head_1, requests_tail_1, newrq ); } } return returnVal; }
int MPI_Isend(const void* buffer, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm, MPI_Request* request) { cqueue_t* mycqueue = handle_get_cqueue(comm); if (mycqueue != NULL) return cqueue_isend(mycqueue, buffer, count, datatype, dst, tag, comm, request); else { if (std_mpi_mode == STD_MPI_MODE_IMPLICIT && max_ep > 0) return cqueue_isend(client_get_cqueue((taskid + dst) % max_ep), buffer, count, datatype, dst, tag, comm, request); return PMPI_Isend(buffer, count, datatype, dst, tag, comm, request); } }
static int MTCORE_Send_pscw_post_msg(int post_grp_size, MTCORE_Win * uh_win) { int mpi_errno = MPI_SUCCESS; int i, user_rank; char post_flg = 1; MPI_Request *reqs = NULL; MPI_Status *stats = NULL; int remote_cnt = 0; reqs = calloc(post_grp_size, sizeof(MPI_Request)); stats = calloc(post_grp_size, sizeof(MPI_Status)); PMPI_Comm_rank(uh_win->user_comm, &user_rank); for (i = 0; i < post_grp_size; i++) { int origin_rank = uh_win->post_ranks_in_win_group[i]; /* Do not send to local target, otherwise it may deadlock. * We do not check the wrong sync case that user calls start(self) * before post(self). */ if (user_rank == origin_rank) continue; mpi_errno = PMPI_Isend(&post_flg, 1, MPI_CHAR, origin_rank, MTCORE_PSCW_PS_TAG, uh_win->user_comm, &reqs[remote_cnt++]); if (mpi_errno != MPI_SUCCESS) goto fn_fail; /* Set post flag to true on the main helper of post origin. */ MTCORE_DBG_PRINT("send pscw post msg to origin %d \n", origin_rank); } /* Has to blocking wait here to poll progress. */ mpi_errno = PMPI_Waitall(remote_cnt, reqs, stats); if (mpi_errno != MPI_SUCCESS) goto fn_fail; fn_exit: if (reqs) free(reqs); if (stats) free(stats); return mpi_errno; fn_fail: goto fn_exit; }
int MPI_Isend(const void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Comm comm, MPI_Request *request) { char typename[MPI_MAX_OBJECT_NAME], commname[MPI_MAX_OBJECT_NAME]; int len; int rank; PMPI_Comm_rank(MPI_COMM_WORLD, &rank); PMPI_Type_get_name(type, typename, &len); PMPI_Comm_get_name(comm, commname, &len); fprintf(stderr, "MPI_ISEND[%d]: buf %0" PRIxPTR " count %d datatype %s dest %d tag %d comm %s\n", rank, (uintptr_t) buf, count, typename, dest, tag, commname); fflush(stderr); return PMPI_Isend(buf, count, type, dest, tag, comm, request); }
int MPI_Send_Nospin( void *buff, const int count, MPI_Datatype datatype, const int dest, const int tag, MPI_Comm comm ) { MPI_Request req; PMPI_Isend( buff, count, datatype, dest, tag, comm, &req ); MPI_Status status; timespec ts{ 0, nsec_start }; int flag = 0; while ( !flag ) { nanosleep( &ts, nullptr ); ts.tv_nsec = std::min( size_t(ts.tv_nsec << 1), nsec_max ); PMPI_Request_get_status( req, &flag, &status ); } return status.MPI_ERROR; }
int MPI_Isend( void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request) { MPI_Status status; double start, finish; int realsize; // actual send PMPI_Isend(buf, count, datatype, dest, tag, comm, request); // get datatype size MPI_Type_size(datatype, &realsize); realsize *= count; // record send count my_send_count[dest]++; // record send size my_send_size[dest] += realsize; return 0; }
int MPI_Isend(MPI_CONST void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *req) { int done; PNMPIMOD_Datatype_Parameters_t ref; char *b; int l,s; MPI_Datatype t; r_get(buf, count, datatype, &ref); printf("Sending to %i :\n",dest); do { PNMPIMOD_Datatype_getItem(&ref,&b,&t,&l,&s,&done) #ifdef USE_FUNCTIONS ; #endif printf("\t%i ",l); if (t==MPI_INT) printf("INT "); else if (t==MPI_SHORT) printf("SHORT "); else if (t==MPI_LONG) printf("LONG "); else if (t==MPI_CHAR) printf("CHAR "); else if (t==MPI_DOUBLE) printf("DOUBLE"); else if (t==MPI_FLOAT) printf("FLOAT "); else printf("Other"); printf(" of size %i at buf %16p / %li\n",s,b,((long) b)-((long) buf)); fflush(stdout); } while (!done); r_del(&ref); return PMPI_Isend(buf,count,datatype,dest,tag,comm,req); }
int main(int argc, char *argv[]) { int numproc, rank, len; char hostname[MPI_MAX_PROCESSOR_NAME]; PMPI_Init(&argc, &argv); PMPI_Comm_size(MPI_COMM_WORLD, &numproc); PMPI_Comm_rank(MPI_COMM_WORLD, &rank); PMPI_Get_processor_name(hostname, &len); if (rank==0) { int *freq,i,j; freq=(int *)malloc(sizeof(int)*numproc); char *temp; temp=(char*)malloc(sizeof(char)*(numproc-1)); MPI_Status *stat, *stat1; stat = (MPI_Status*)malloc(sizeof(MPI_Status)*(numproc-1)); stat1 = (MPI_Status*)malloc(sizeof(MPI_Status)*(numproc-1)); MPI_Request *req; req = (MPI_Request *)malloc(sizeof(MPI_Request)*(numproc-1)); int N=numproc*numproc; for(i=1; i<numproc; i++) { PMPI_Recv(temp+i-1, 1, MPI_CHAR, i, 0, MPI_COMM_WORLD, stat+(i-1));//, req+(i-1)*2); } for(i=1; i<numproc; i++) { PMPI_Recv(freq+i*numproc, numproc, MPI_INT, i, 1, MPI_COMM_WORLD, stat1+(i-1)); } printf("echo\n"); // MPI_Waitall((numproc-1), req, stat); for (i=1; i<numproc; i++) { printf("Rank %d ", i); for (j=0; j<numproc; j++) { if(j!=i) { int loc = i*numproc+j; printf("%d ",freq[loc]); } } printf("\n"); } } else { int i, *nsend; char *rMsg, msg='x'; rMsg=(char*)malloc(sizeof(char)); nsend=(int*)malloc(sizeof(int)*numproc); // msg=(char*)malloc(sizeof(char)); // memset(msg, 'z', sizeof(char)); memset(nsend, 0, sizeof(int)*numproc); MPI_Request *req; req = (MPI_Request *)malloc(sizeof(MPI_Request)*(numproc)); MPI_Status *stat; stat = (MPI_Status*)malloc(sizeof(MPI_Status)*(numproc-1)); for (i=0; i<numproc; i++) { if(i!=rank) { *(nsend+i)+=*(nsend+i)+1; PMPI_Isend(&msg, 1, MPI_CHAR, i, 0, MPI_COMM_WORLD, &(req[i])); } } // printf("Echo-1\n"); for (i=1; i<numproc; i++) { if (i!=rank) PMPI_Recv(rMsg, 1, MPI_CHAR, i, 0, MPI_COMM_WORLD, stat+i-1); } // printf("Echo-2\n"); MPI_Isend(nsend, numproc, MPI_INT, 0, 1, MPI_COMM_WORLD, req+numproc); // MPI_Isend(msg, 1, MPI_CHAR, i, 0, MPI_COMM_WORLD, req+numproc); // printf("Echo-3\n"); } PMPI_Finalize(); return(0); }
/* Same behavior as PMPI_Irsend.c */ int PMPI_Issend (void* message, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request* request) { return PMPI_Isend(message,count,datatype,dest,tag,comm,request); }
int main(int argc, char** argv) { MPI_Init(&argc, &argv); int rank, peer, commsize; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &commsize); if (rank % 2) peer = rank - 1 % commsize; else peer = rank + 1 % commsize; if (commsize % 2 != 0) { fprintf(stderr, "Use even number of processes.\n"); exit(EXIT_FAILURE); } char* mpi_inbuf; char* mpi_outbuf; char* pmpi_inbuf; char* pmpi_outbuf; test_start("isend/irecv + test (2, vector[[int], count=2, blklen=3, stride=5])"); init_buffers(20*sizeof(int), &mpi_inbuf, &pmpi_inbuf, &mpi_outbuf, &pmpi_outbuf); MPI_Datatype vector_ddt; MPI_Type_vector(2, 3, 5, MPI_INT, &vector_ddt); MPI_Type_commit(&vector_ddt); MPI_Datatype pmpi_vector_ddt; PMPI_Type_vector(2, 3, 5, MPI_INT, &pmpi_vector_ddt); PMPI_Type_commit(&pmpi_vector_ddt); MPI_Request requests_mpi[2]; MPI_Request requests_pmpi[2]; MPI_Status statuses_mpi[2]; MPI_Status statuses_pmpi[2]; if (rank % 2 == 0) { MPI_Isend(mpi_inbuf, 2, vector_ddt, peer, 0, MPI_COMM_WORLD, &(requests_mpi[0])); MPI_Irecv(mpi_outbuf, 2, vector_ddt, peer, 0, MPI_COMM_WORLD, &(requests_mpi[1])); PMPI_Isend(pmpi_inbuf, 2, pmpi_vector_ddt, peer, 0, MPI_COMM_WORLD, &(requests_pmpi[0])); PMPI_Irecv(pmpi_outbuf, 2, pmpi_vector_ddt, peer, 0, MPI_COMM_WORLD, &(requests_pmpi[1])); } else { MPI_Irecv(mpi_outbuf, 2, vector_ddt, peer, 0, MPI_COMM_WORLD, &(requests_mpi[0])); MPI_Isend(mpi_inbuf, 2, vector_ddt, peer, 0, MPI_COMM_WORLD, &(requests_mpi[1])); PMPI_Irecv(pmpi_outbuf, 2, pmpi_vector_ddt, peer, 0, MPI_COMM_WORLD, &(requests_pmpi[0])); PMPI_Isend(pmpi_inbuf, 2, pmpi_vector_ddt, peer, 0, MPI_COMM_WORLD, &(requests_pmpi[1])); } int flag; flag = 0; while (flag == 0) MPI_Test(&(requests_mpi[0]), &flag, &(statuses_mpi[0])); flag = 0; while (flag == 0) MPI_Test(&(requests_mpi[1]), &flag, &(statuses_mpi[1])); flag = 0; while (flag == 0) MPI_Test(&(requests_pmpi[0]), &flag, &(statuses_pmpi[0])); flag = 0; while (flag == 0) MPI_Test(&(requests_pmpi[1]), &flag, &(statuses_pmpi[1])); int res = compare_buffers(20*sizeof(int), &mpi_inbuf, &pmpi_inbuf, &mpi_outbuf, &pmpi_outbuf); free_buffers(&mpi_inbuf, &pmpi_inbuf, &mpi_outbuf, &pmpi_outbuf); test_result(res); MPI_Type_free(&vector_ddt); PMPI_Type_free(&pmpi_vector_ddt); MPI_Finalize(); }
int main(int argc, char **argv) { /* Validate arguments */ if (argc != 5) { fprintf(stderr, "Usage: %s [input file] [output file] [grid size] [iterations]\n", argv[0]); return 1; } double t1 = MPI_Wtime(); /* Initialize MPI */ int tasks, rank; MPI_Init(NULL, NULL); MPI_Comm_size(MPI_COMM_WORLD, &tasks); MPI_Comm_rank(MPI_COMM_WORLD, &rank); double t_init = MPI_Wtime(); /* Declare global variables */ double *initial = NULL; int N = atoi(argv[3]); int T = atoi(argv[4]); /*****************************************************************/ /* READ DATA */ /*****************************************************************/ if (rank == 0) { if (DEBUG) { fprintf(stderr, "\n-------------------------------------------\n"); fprintf(stderr, "Starting heat.c with %d processes\n", tasks); } /* Read data */ FILE *in = fopen(argv[1], "r"); initial = malloc(N * N * sizeof(double)); if (DEBUG) { for (int i = 1; i <= N; i++) { for (int j = 1; j <= N; j++) { double val = (double) (i * (N - i - 1) * j * (N - j - 1)); initial[(i - 1) * N + (j - 1)] = val; } } } else { for (int i = 0; i < N * N; i++) { int x, y; double z; fscanf(in, "%d %d %lf\n", &x, &y, &z); initial[(x - 1) * N + (y - 1)] = z; } } fclose(in); } double t_read = MPI_Wtime(); /*****************************************************************/ /* DISTRIBUTE DATA */ /*****************************************************************/ /* Preliminaries */ int rowsPerWorker = N / tasks; int extra = N % tasks; /* Determine neighboring workers */ int pred = rank - 1; int succ = rank + 1; /* Determine how many values each worker will get */ int offset = 0; int offsets[tasks]; int items[tasks]; for (int i = 0; i < tasks; i++) { items[i] = rowsPerWorker * N; if (i < extra) items[i] += N; offsets[i] = offset; offset += items[i]; } /* Scatter the rows appropriately */ int myrows = items[rank] / N; /* Allocate an extra row of padding on either end */ double *current = calloc((items[rank] + 2 * N), sizeof(double)); double *old = calloc((items[rank] + 2 * N), sizeof(double)); if (DEBUG && rank == 0) fprintf(stderr, "scattering...\n"); PMPI_Scatterv(initial, items, offsets, MPI_DOUBLE, current + N, items[rank], MPI_DOUBLE, 0, MPI_COMM_WORLD); double t_scatter = MPI_Wtime(); /*****************************************************************/ /* CALCULATE */ /*****************************************************************/ MPI_Request req; double t_net = 0; for (int t = 0; t < T; t++) { if (DEBUG) fprintf(stderr, "Beginning iteration %d: rank %d\n", t, rank); /* Swap old and current so we can overwrite current */ double *temp = current; current = old; old = temp; /* Hold onto some useful pointers into old */ double *succrow = old + N * (myrows + 1); double *predrow = old; double *firstrow = old + N; double *lastrow = old + myrows; double t_temp = MPI_Wtime(); /* Send last row to succ and receive it from pred if eligible */ if (succ < tasks) { PMPI_Isend(lastrow, N, MPI_DOUBLE, succ, 0, MPI_COMM_WORLD, &req); } if (pred >= 0) { PMPI_Recv(predrow, N, MPI_DOUBLE, pred, 0, MPI_COMM_WORLD, 0); } /* Send first row to pred and receive it from succ if eligible */ if (pred >= 0) { PMPI_Isend(firstrow, N, MPI_DOUBLE, pred, 0, MPI_COMM_WORLD, &req); } if (succ < tasks) { PMPI_Recv(succrow, N, MPI_DOUBLE, succ, 0, MPI_COMM_WORLD, 0); } t_net += MPI_Wtime() - t_temp; /* Determine current from old, predrow, and succrow */ for (int j = 1; j <= myrows; j++) { for (int k = 0; k < N; k++) { /* Determine adjacent cells */ double left = 0, right = 0; if (k > 0 ) left = old[j * N + k - 1]; if (k < N - 1) right = old[j * N + k + 1]; double top = old[(j - 1) * N + k]; double bottom = old[(j + 1) * N + k]; double focus = old[j * N + k]; /* Calculate the new cell value */ current[j * N + k] = focus + .1 * (top + bottom - 2 * focus) + .1 * (left + right - 2 * focus); } } } free(old); double t_work = MPI_Wtime(); /*****************************************************************/ /* WRITE THE OUTPUT */ /*****************************************************************/ PMPI_Gatherv(current + N, items[rank], MPI_DOUBLE, initial, items, offsets, MPI_DOUBLE, 0, MPI_COMM_WORLD); double t_gather = MPI_Wtime(); free(current); if (rank == 0 && !DEBUG) { FILE *out = fopen(argv[2], "w"); for (int i = 0; i < N; i++) { for (int j = 0; j < N; j++) { fprintf(out, "%d %d %lf\n", i, j, initial[i * N + j]); } } fclose(out); } double t2 = MPI_Wtime(); if (rank == 0) { fprintf(stderr, "-----------------------------------------\n"); fprintf(stderr, "TIMING INFORMATION \n"); fprintf(stderr, "-----------------------------------------\n"); fprintf(stderr, "RANK INIT READ SCATTER WORK GATHER WRITE TOTAL NET\n"); } MPI_Barrier(MPI_COMM_WORLD); fprintf(stderr, "%4.2d %8.2f %8.2f %8.2f %8.2f %8.2f %8.2f %8.2f %8.2f\n", rank, t_init - t1, t_read - t_init, t_scatter - t_read, t_work - t_scatter, t_gather - t_work, t2 - t_gather, t2 - t1, t_net); free(initial); MPI_Finalize(); return 0; }
int MPI_Isend(void *buf, int count, MPI_Datatype datatype, int dst, int tag, MPI_Comm comm, MPI_Request * request) { return PMPI_Isend(buf, count, datatype, dst, tag, comm, request); }
int MPI_Win_free(MPI_Win * win) { static const char FCNAME[] = "MTCORE_Win_free"; int mpi_errno = MPI_SUCCESS; MTCORE_Win *uh_win; int user_rank, user_nprocs, user_local_rank, user_local_nprocs; int i, j; MPI_Request *reqs = NULL; MPI_Status *stats = NULL; MTCORE_DBG_PRINT_FCNAME(); MTCORE_Fetch_uh_win_from_cache(*win, uh_win); if (uh_win == NULL) { /* normal window */ return PMPI_Win_free(win); } /* mtcore window starts */ PMPI_Comm_rank(uh_win->user_comm, &user_rank); PMPI_Comm_size(uh_win->user_comm, &user_nprocs); PMPI_Comm_rank(uh_win->local_user_comm, &user_local_rank); PMPI_Comm_size(uh_win->local_user_comm, &user_local_nprocs); /* First unlock global active window */ if ((uh_win->info_args.epoch_type & MTCORE_EPOCH_FENCE) || (uh_win->info_args.epoch_type & MTCORE_EPOCH_PSCW)) { MTCORE_DBG_PRINT("[%d]unlock_all(active_win 0x%x)\n", user_rank, uh_win->active_win); /* Since all processes must be in win_free, we do not need worry * the possibility losing asynchronous progress. */ mpi_errno = PMPI_Win_unlock_all(uh_win->active_win); if (mpi_errno != MPI_SUCCESS) goto fn_fail; } if (user_local_rank == 0) { MTCORE_Func_start(MTCORE_FUNC_WIN_FREE, user_nprocs, user_local_nprocs); } /* Notify the handle of target Helper win. It is noted that helpers cannot * fetch the corresponding window without handlers so that only global communicator * can be used here.*/ if (user_local_rank == 0) { reqs = calloc(MTCORE_ENV.num_h, sizeof(MPI_Request)); stats = calloc(MTCORE_ENV.num_h, sizeof(MPI_Status)); for (j = 0; j < MTCORE_ENV.num_h; j++) { mpi_errno = PMPI_Isend(&uh_win->h_win_handles[j], 1, MPI_UNSIGNED_LONG, MTCORE_H_RANKS_IN_LOCAL[j], 0, MTCORE_COMM_LOCAL, &reqs[j]); } mpi_errno = PMPI_Waitall(MTCORE_ENV.num_h, reqs, stats); if (mpi_errno != MPI_SUCCESS) goto fn_fail; } /* Free uh_win before local_uh_win, because all the incoming operations * should be done before free shared buffers. * * We do not need additional barrier in Manticore for waiting all * operations complete, because Win_free already internally add a barrier * for waiting operations on that window complete. */ if (uh_win->num_uh_wins > 0 && uh_win->uh_wins) { MTCORE_DBG_PRINT("\t free uh windows\n"); for (i = 0; i < uh_win->num_uh_wins; i++) { if (uh_win->uh_wins[i]) { mpi_errno = PMPI_Win_free(&uh_win->uh_wins[i]); if (mpi_errno != MPI_SUCCESS) goto fn_fail; } } } if (uh_win->active_win) { MTCORE_DBG_PRINT("\t free active window\n"); mpi_errno = PMPI_Win_free(&uh_win->active_win); if (mpi_errno != MPI_SUCCESS) goto fn_fail; } if (uh_win->local_uh_win) { MTCORE_DBG_PRINT("\t free shared window\n"); mpi_errno = PMPI_Win_free(&uh_win->local_uh_win); if (mpi_errno != MPI_SUCCESS) goto fn_fail; } if (uh_win->user_group != MPI_GROUP_NULL) { mpi_errno = PMPI_Group_free(&uh_win->user_group); if (mpi_errno != MPI_SUCCESS) goto fn_fail; } if (uh_win->ur_h_comm && uh_win->ur_h_comm != MPI_COMM_NULL) { MTCORE_DBG_PRINT("\t free user root + helpers communicator\n"); mpi_errno = PMPI_Comm_free(&uh_win->ur_h_comm); if (mpi_errno != MPI_SUCCESS) goto fn_fail; } if (uh_win->local_uh_comm && uh_win->local_uh_comm != MTCORE_COMM_LOCAL) { MTCORE_DBG_PRINT("\t free shared communicator\n"); mpi_errno = PMPI_Comm_free(&uh_win->local_uh_comm); if (mpi_errno != MPI_SUCCESS) goto fn_fail; } if (uh_win->local_uh_group != MPI_GROUP_NULL) { mpi_errno = PMPI_Group_free(&uh_win->local_uh_group); if (mpi_errno != MPI_SUCCESS) goto fn_fail; } if (uh_win->uh_comm != MPI_COMM_NULL && uh_win->uh_comm != MPI_COMM_WORLD) { MTCORE_DBG_PRINT("\t free uh communicator\n"); mpi_errno = PMPI_Comm_free(&uh_win->uh_comm); if (mpi_errno != MPI_SUCCESS) goto fn_fail; } if (uh_win->uh_group != MPI_GROUP_NULL) { mpi_errno = PMPI_Group_free(&uh_win->uh_group); if (mpi_errno != MPI_SUCCESS) goto fn_fail; } if (uh_win->local_user_comm && uh_win->local_user_comm != MTCORE_COMM_USER_LOCAL) { MTCORE_DBG_PRINT("\t free local USER communicator\n"); mpi_errno = PMPI_Comm_free(&uh_win->local_user_comm); if (mpi_errno != MPI_SUCCESS) goto fn_fail; } if (uh_win->user_root_comm && uh_win->user_root_comm != MTCORE_COMM_UR_WORLD) { MTCORE_DBG_PRINT("\t free ur communicator\n"); mpi_errno = PMPI_Comm_free(&uh_win->user_root_comm); if (mpi_errno != MPI_SUCCESS) goto fn_fail; } MTCORE_DBG_PRINT("\t free window cache\n"); MTCORE_Remove_uh_win_from_cache(*win); MTCORE_DBG_PRINT("\t free user window\n"); mpi_errno = PMPI_Win_free(win); if (mpi_errno != MPI_SUCCESS) goto fn_fail; /* free PSCW array in case use does not call complete/wait. */ if (uh_win->start_ranks_in_win_group) free(uh_win->start_ranks_in_win_group); if (uh_win->post_ranks_in_win_group) free(uh_win->post_ranks_in_win_group); /* uh_win->user_comm is created by user, will be freed by user. */ #if defined(MTCORE_ENABLE_RUNTIME_LOAD_OPT) if (uh_win->h_ops_counts) free(uh_win->h_ops_counts); if (uh_win->h_bytes_counts) free(uh_win->h_bytes_counts); #endif if (uh_win->targets) { for (i = 0; i < user_nprocs; i++) { if (uh_win->targets[i].base_h_offsets) free(uh_win->targets[i].base_h_offsets); if (uh_win->targets[i].h_ranks_in_uh) free(uh_win->targets[i].h_ranks_in_uh); if (uh_win->targets[i].segs) free(uh_win->targets[i].segs); } free(uh_win->targets); } if (uh_win->h_ranks_in_uh) free(uh_win->h_ranks_in_uh); if (uh_win->h_win_handles) free(uh_win->h_win_handles); if (uh_win->uh_wins) free(uh_win->uh_wins); free(uh_win); MTCORE_DBG_PRINT("Freed MTCORE window 0x%x\n", *win); fn_exit: if (reqs) free(reqs); if (stats) free(stats); return mpi_errno; fn_fail: goto fn_exit; }