/************************************************************************* * This function tests the repeated shmem_put **************************************************************************/ void SetUp(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) { int i, j, k, islocal, penum, gnvtxs, nvtxs, nlocal, firstvtx, lastvtx, nsend, nrecv, nnbrs, nadj; int npes=ctrl->npes, mype=ctrl->mype; idxtype *vtxdist, *xadj, *adjncy; idxtype *peind, *recvptr, *recvind, *sendptr, *sendind; idxtype *receive, *pemap, *imap, *lperm; idxtype *pexadj, *peadjncy, *peadjloc, *startsind; KeyValueType *recvrequests, *sendrequests, *adjpairs; IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->SetupTmr)); gnvtxs = graph->gnvtxs; nvtxs = graph->nvtxs; vtxdist = graph->vtxdist; xadj = graph->xadj; adjncy = graph->adjncy; firstvtx = vtxdist[mype]; lastvtx = vtxdist[mype+1]; pemap = wspace->pv1; idxset(npes, -1, pemap); lperm = graph->lperm = idxmalloc(nvtxs, "SetUp: graph->lperm"); for (i=0; i<nvtxs; i++) lperm[i] = i; /************************************************************* * Determine what you need to receive *************************************************************/ receive = wspace->indices; /* Use the large global received array for now */ adjpairs = wspace->pairs; for (nlocal = nadj = i = 0; i<nvtxs; i++) { islocal = 1; for (j=xadj[i]; j<xadj[i+1]; j++) { k = adjncy[j]; if (k >= firstvtx && k < lastvtx) { adjncy[j] = k-firstvtx; continue; /* local vertex */ } adjpairs[nadj].key = k; adjpairs[nadj++].val = j; islocal = 0; } if (islocal) { lperm[i] = lperm[nlocal]; lperm[nlocal++] = i; } } /* Take care the received part now */ ikeysort(nadj, adjpairs); adjpairs[nadj].key = gnvtxs+1; /* Boundary condition */ for (nrecv=i=0; i<nadj; i++) { adjncy[adjpairs[i].val] = nvtxs+nrecv; if (adjpairs[i].key != adjpairs[i+1].key) receive[nrecv++] = adjpairs[i].key; } /* Allocate space for the setup info attached to this level of the graph */ peind = graph->peind = idxmalloc(npes, "SetUp: peind"); recvptr = graph->recvptr = idxmalloc(npes+1, "SetUp: recvptr"); recvind = graph->recvind = idxmalloc(nrecv, "SetUp: recvind"); /* Take care of the received portion */ idxcopy(nrecv, receive, recvind); /* Copy the vertices to be received into recvind */ i = nnbrs = recvptr[0] = 0; for (penum=0; penum<npes; penum++) { for (j=i; j<nrecv; j++) { if (recvind[j] >= vtxdist[penum+1]) break; } if (j > i) { peind[nnbrs] = penum; recvptr[++nnbrs] = j; i = j; } } /************************************************************* * Determine what you need to send *************************************************************/ /* Tell the other processors what they need to send you */ recvrequests = wspace->pepairs1; sendrequests = wspace->pepairs2; for (i=0; i<npes; i++) recvrequests[i].key = 0; for (i=0; i<nnbrs; i++) { recvrequests[peind[i]].key = recvptr[i+1]-recvptr[i]; recvrequests[peind[i]].val = nvtxs+recvptr[i]; } MPI_Alltoall((void *)recvrequests, 2, IDX_DATATYPE, (void *)sendrequests, 2, IDX_DATATYPE, ctrl->comm); sendptr = graph->sendptr = idxmalloc(npes+1, "SetUp: sendptr"); startsind = wspace->pv2; for (j=i=0; i<npes; i++) { if (sendrequests[i].key > 0) { sendptr[j] = sendrequests[i].key; startsind[j] = sendrequests[i].val; j++; } } ASSERT(ctrl, nnbrs == j); MAKECSR(i, j, sendptr); nsend = sendptr[nnbrs]; sendind = graph->sendind = idxmalloc(nsend, "SetUp: sendind"); /* Issue the receives for sendind */ for (i=0; i<nnbrs; i++) { MPI_Irecv((void *)(sendind+sendptr[i]), sendptr[i+1]-sendptr[i], IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->rreq+i); } /* Issue the sends. My recvind[penum] becomes penum's sendind[mype] */ for (i=0; i<nnbrs; i++) { MPI_Isend((void *)(recvind+recvptr[i]), recvptr[i+1]-recvptr[i], IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->sreq+i); } MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); /* Create the peadjncy data structure for sparse boundary exchanges */ pexadj = graph->pexadj = idxsmalloc(nvtxs+1, 0, "SetUp: pexadj"); peadjncy = graph->peadjncy = idxmalloc(nsend, "SetUp: peadjncy"); peadjloc = graph->peadjloc = idxmalloc(nsend, "SetUp: peadjloc"); for (i=0; i<nsend; i++) { ASSERTP(ctrl, sendind[i] >= firstvtx && sendind[i] < lastvtx, (ctrl, "%d %d %d\n", sendind[i], firstvtx, lastvtx)); pexadj[sendind[i]-firstvtx]++; } MAKECSR(i, nvtxs, pexadj); for (i=0; i<nnbrs; i++) { for (j=sendptr[i]; j<sendptr[i+1]; j++) { k = pexadj[sendind[j]-firstvtx]++; peadjncy[k] = i; /* peind[i] is the actual PE number */ peadjloc[k] = startsind[i]++; } } ASSERT(ctrl, pexadj[nvtxs] == nsend); for (i=nvtxs; i>0; i--) pexadj[i] = pexadj[i-1]; pexadj[0] = 0; graph->nnbrs = nnbrs; graph->nrecv = nrecv; graph->nsend = nsend; graph->nlocal = nlocal; /* Create the inverse map from ladjncy to adjncy */ imap = graph->imap = idxmalloc(nvtxs+nrecv, "SetUp: imap"); for (i=0; i<nvtxs; i++) imap[i] = firstvtx+i; for (i=0; i<nrecv; i++) imap[nvtxs+i] = recvind[i]; /* Check if wspace->nlarge is large enough for nrecv and nsend */ if (wspace->nlarge < nrecv+nsend) { free(wspace->indices); free(wspace->pairs); wspace->nlarge = nrecv+nsend; wspace->indices = idxmalloc(wspace->nlarge, "SetUp: wspace->indices"); wspace->pairs = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*wspace->nlarge, "SetUp: wspace->pairs"); } IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->SetupTmr)); #ifdef DEBUG_SETUPINFO rprintf(ctrl, "[%5d %5d] \tl:[%5d %5d] \ts:[%5d, %5d] \tr:[%5d, %5d]\n", GlobalSEMin(ctrl, nvtxs), GlobalSEMax(ctrl, nvtxs), GlobalSEMin(ctrl, nlocal), GlobalSEMax(ctrl, nlocal), GlobalSEMin(ctrl, nsend), GlobalSEMax(ctrl, nsend), GlobalSEMin(ctrl, nrecv), GlobalSEMax(ctrl, nrecv)); PrintSetUpInfo(ctrl, graph); #endif }
void xchange_deri() { int cntr=0; #ifdef MPI MPI_Request request[8]; MPI_Status status[8]; int ix,mu, t, y, z, x; # if (defined PARALLELT || defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT ) /* send the data to the neighbour on the left in time direction */ /* recieve the data from the neighbour on the right in time direction */ MPI_Isend(&df0[gI_m1_0_0_0][0].d1, 1, deri_time_slice_cont, g_nb_t_dn, 43, g_cart_grid, &request[cntr]); MPI_Irecv(&ddummy[gI_Lm1_0_0_0][0].d1, 1, deri_time_slice_cont, g_nb_t_up, 43, g_cart_grid, &request[cntr+1]); cntr=cntr+2; # endif # if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELX || defined PARALLELXY || defined PARALLELXYZ ) /* send the data to the neighbour on the left in x direction */ /* recieve the data from the neighbour on the right in x direction */ MPI_Isend(&df0[gI_0_m1_0_0][0], 1, deri_x_slice_cont, g_nb_x_dn, 44, g_cart_grid, &request[cntr]); MPI_Irecv(&ddummy[gI_0_Lm1_0_0][0], 1, deri_x_slice_gath, g_nb_x_up, 44, g_cart_grid, &request[cntr+1]); cntr=cntr+2; # endif # if (defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELXY || defined PARALLELXYZ ) /* send the data to the neighbour on the left in y direction */ /* recieve the data from the neighbour on the right in y direction */ MPI_Isend((void*)df0[gI_0_0_m1_0], 1, deri_y_slice_cont, g_nb_y_dn, 45, g_cart_grid, &request[cntr]); MPI_Irecv((void*)ddummy[gI_0_0_Lm1_0], 1, deri_y_slice_gath, g_nb_y_up, 45, g_cart_grid, &request[cntr+1]); cntr=cntr+2; # endif # if (defined PARALLELXYZT || defined PARALLELXYZ ) /* send the data to the neighbour on the left in z direction */ /* recieve the data from the neighbour on the right in z direction */ MPI_Isend((void*)df0[gI_0_0_0_m1], 1, deri_z_slice_cont, g_nb_z_dn, 46, g_cart_grid, &request[cntr]); MPI_Irecv((void*)ddummy[gI_0_0_0_Lm1], 1, deri_z_slice_gath, g_nb_z_up, 46, g_cart_grid, &request[cntr+1]); cntr=cntr+2; # endif MPI_Waitall(cntr, request, status); # if (defined PARALLELT || defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT ) /* add ddummy to df0 */ for(x = 0; x < LX; x++) { for(y = 0; y < LY; y++) { for(z = 0; z < LZ; z++) { ix = g_ipt[T-1][x][y][z]; for(mu=0;mu<4;mu++){ df0[ix][mu].d1 += ddummy[ix][mu].d1; df0[ix][mu].d2 += ddummy[ix][mu].d2; df0[ix][mu].d3 += ddummy[ix][mu].d3; df0[ix][mu].d4 += ddummy[ix][mu].d4; df0[ix][mu].d5 += ddummy[ix][mu].d5; df0[ix][mu].d6 += ddummy[ix][mu].d6; df0[ix][mu].d7 += ddummy[ix][mu].d7; df0[ix][mu].d8 += ddummy[ix][mu].d8; } } } } /* send the data to the neighbour on the right is not needed*/ # endif # if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELX || defined PARALLELXY || defined PARALLELXYZ ) /* add ddummy to df0 */ for(t = 0; t < T; t++) { for(y = 0; y < LY; y++) { for(z = 0; z < LZ; z++) { ix = g_ipt[t][LX-1][y][z]; for(mu=0;mu<4;mu++){ df0[ix][mu].d1 += ddummy[ix][mu].d1; df0[ix][mu].d2 += ddummy[ix][mu].d2; df0[ix][mu].d3 += ddummy[ix][mu].d3; df0[ix][mu].d4 += ddummy[ix][mu].d4; df0[ix][mu].d5 += ddummy[ix][mu].d5; df0[ix][mu].d6 += ddummy[ix][mu].d6; df0[ix][mu].d7 += ddummy[ix][mu].d7; df0[ix][mu].d8 += ddummy[ix][mu].d8; } } } } /* send the data to the neighbour on the right is not needed*/ # endif # if (defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELXY || defined PARALLELXYZ ) /* add ddummy to df0 */ for(t = 0; t < T; t++) { for(x = 0; x < LX; x++) { for(z = 0; z < LZ; z++) { ix = g_ipt[t][x][LY-1][z]; for(mu=0;mu<4;mu++){ df0[ix][mu].d1 += ddummy[ix][mu].d1; df0[ix][mu].d2 += ddummy[ix][mu].d2; df0[ix][mu].d3 += ddummy[ix][mu].d3; df0[ix][mu].d4 += ddummy[ix][mu].d4; df0[ix][mu].d5 += ddummy[ix][mu].d5; df0[ix][mu].d6 += ddummy[ix][mu].d6; df0[ix][mu].d7 += ddummy[ix][mu].d7; df0[ix][mu].d8 += ddummy[ix][mu].d8; } } } } /* send the data to the neighbour on the right is not needed*/ # endif # if ( defined PARALLELXYZT || defined PARALLELXYZ ) /* add ddummy to df0 */ for(t = 0; t < T; t++) { for(x = 0; x < LX; x++) { for(y = 0; y < LY; y++) { ix = g_ipt[t][x][y][LZ-1]; for(mu=0;mu<4;mu++){ df0[ix][mu].d1 += ddummy[ix][mu].d1; df0[ix][mu].d2 += ddummy[ix][mu].d2; df0[ix][mu].d3 += ddummy[ix][mu].d3; df0[ix][mu].d4 += ddummy[ix][mu].d4; df0[ix][mu].d5 += ddummy[ix][mu].d5; df0[ix][mu].d6 += ddummy[ix][mu].d6; df0[ix][mu].d7 += ddummy[ix][mu].d7; df0[ix][mu].d8 += ddummy[ix][mu].d8; } } } } /* send the data to the neighbour on the right is not needed*/ # endif return; # endif /* MPI */ }
/* ADIOI_Exchange_file_views - Sends all the aggregators the file * views and file view states of the clients. It fills in the * client_file_view_state_arr for the aggregators and the * my_mem_view_state for the client. It also initializes the * agg_file_view_state for all clients, which is the view for each * aggregator of a client's filetype. */ void ADIOI_Exch_file_views(int myrank, int nprocs, int file_ptr_type, ADIO_File fd, int count, MPI_Datatype datatype, ADIO_Offset off, view_state * my_mem_view_state_arr, view_state * agg_file_view_state_arr, view_state * client_file_view_state_arr) { /* Convert my own fileview to an ADIOI_Flattened type and a * disp. MPI_Alltoall the count of ADIOI_Flatlist nodes. * MPI_Isend/Irecv the block_lens, indices of ADIOI_Flatlist node * to/from each of the aggregators with the rest of the file view * state. */ int i = -1, j = -1; amount_and_extra_data_t *send_count_arr = NULL; amount_and_extra_data_t *recv_count_arr = NULL; int send_req_arr_sz = 0; int recv_req_arr_sz = 0; MPI_Request *send_req_arr = NULL, *recv_req_arr = NULL; MPI_Status *statuses = NULL; ADIO_Offset disp_off_sz_ext_typesz[6]; MPI_Aint memtype_extent, filetype_extent; int ret = -1; /* parameters for datatypes */ ADIOI_Flatlist_node *flat_mem_p = NULL, *flat_file_p = NULL; MPI_Count memtype_sz = -1; int memtype_is_contig = -1; ADIO_Offset filetype_sz = -1; #ifdef AGGREGATION_PROFILE MPE_Log_event(5014, 0, NULL); #endif /* The memtype will be freed after the call. The filetype will be * freed in the close and should have been flattened in the file * view. */ MPI_Type_size_x(datatype, &memtype_sz); MPI_Type_extent(datatype, &memtype_extent); if (memtype_sz == memtype_extent) { memtype_is_contig = 1; flat_mem_p = ADIOI_Flatten_and_find(datatype); flat_mem_p->blocklens[0] = memtype_sz * count; } else { flat_mem_p = ADIOI_Flatten_and_find(datatype); } MPI_Type_extent(fd->filetype, &filetype_extent); MPI_Type_size_x(fd->filetype, &filetype_sz); flat_file_p = ADIOI_Flatten_and_find(fd->filetype); if (filetype_extent == filetype_sz) { flat_file_p->blocklens[0] = memtype_sz * count; filetype_extent = memtype_sz * count; filetype_sz = filetype_extent; } disp_off_sz_ext_typesz[0] = fd->fp_ind; disp_off_sz_ext_typesz[1] = fd->disp; disp_off_sz_ext_typesz[2] = off; disp_off_sz_ext_typesz[3] = memtype_sz * count; disp_off_sz_ext_typesz[4] = (ADIO_Offset) filetype_extent; disp_off_sz_ext_typesz[5] = (ADIO_Offset) filetype_sz; if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) { recv_count_arr = ADIOI_Calloc(nprocs, sizeof(amount_and_extra_data_t)); send_count_arr = ADIOI_Calloc(nprocs, sizeof(amount_and_extra_data_t)); } else { send_count_arr = ADIOI_Calloc(fd->hints->cb_nodes, sizeof(amount_and_extra_data_t)); /* only aggregators receive data */ if (fd->is_agg) { recv_count_arr = ADIOI_Calloc(nprocs, sizeof(amount_and_extra_data_t)); recv_req_arr = ADIOI_Malloc(nprocs * sizeof(MPI_Request)); for (i = 0; i < nprocs; i++) MPI_Irecv(&recv_count_arr[i], sizeof(amount_and_extra_data_t), MPI_BYTE, i, COUNT_EXCH, fd->comm, &recv_req_arr[i]); } /* only send data to aggregators */ send_req_arr = ADIOI_Calloc(fd->hints->cb_nodes, sizeof(MPI_Request)); for (i = 0; i < fd->hints->cb_nodes; i++) { send_count_arr[i].count = flat_file_p->count; send_count_arr[i].fp_ind = disp_off_sz_ext_typesz[0]; send_count_arr[i].disp = disp_off_sz_ext_typesz[1]; send_count_arr[i].byte_off = disp_off_sz_ext_typesz[2]; send_count_arr[i].sz = disp_off_sz_ext_typesz[3]; send_count_arr[i].ext = disp_off_sz_ext_typesz[4]; send_count_arr[i].type_sz = disp_off_sz_ext_typesz[5]; MPI_Isend(&send_count_arr[i], sizeof(amount_and_extra_data_t), MPI_BYTE, fd->hints->ranklist[i], COUNT_EXCH, fd->comm, &send_req_arr[i]); } } /* Every client has to build mem and file view_states for each aggregator. * We initialize their values here. and we also initialize * send_count_arr */ if (memtype_is_contig) { /* if memory is contigous, we now replace memtype_sz and * memtype_extent with the full access size */ memtype_sz *= count; memtype_extent = memtype_sz; } for (i = 0; i < fd->hints->cb_nodes; i++) { int tmp_agg_idx = fd->hints->ranklist[i]; memset(&(my_mem_view_state_arr[tmp_agg_idx]), 0, sizeof(view_state)); my_mem_view_state_arr[tmp_agg_idx].sz = disp_off_sz_ext_typesz[3]; my_mem_view_state_arr[tmp_agg_idx].ext = (ADIO_Offset) memtype_extent; my_mem_view_state_arr[tmp_agg_idx].type_sz = (ADIO_Offset) memtype_sz; my_mem_view_state_arr[tmp_agg_idx].flat_type_p = flat_mem_p; ADIOI_init_view_state(file_ptr_type, 1, &(my_mem_view_state_arr[tmp_agg_idx]), TEMP_OFF); ADIOI_init_view_state(file_ptr_type, 1, &(my_mem_view_state_arr[tmp_agg_idx]), REAL_OFF); memset(&(agg_file_view_state_arr[tmp_agg_idx]), 0, sizeof(view_state)); agg_file_view_state_arr[tmp_agg_idx].fp_ind = disp_off_sz_ext_typesz[0]; agg_file_view_state_arr[tmp_agg_idx].disp = disp_off_sz_ext_typesz[1]; agg_file_view_state_arr[tmp_agg_idx].byte_off = disp_off_sz_ext_typesz[2]; agg_file_view_state_arr[tmp_agg_idx].sz = disp_off_sz_ext_typesz[3]; agg_file_view_state_arr[tmp_agg_idx].ext = disp_off_sz_ext_typesz[4]; agg_file_view_state_arr[tmp_agg_idx].type_sz = disp_off_sz_ext_typesz[5]; agg_file_view_state_arr[tmp_agg_idx].flat_type_p = flat_file_p; ADIOI_init_view_state(file_ptr_type, 1, &(agg_file_view_state_arr[tmp_agg_idx]), TEMP_OFF); ADIOI_init_view_state(file_ptr_type, 1, &(agg_file_view_state_arr[tmp_agg_idx]), REAL_OFF); if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) { send_count_arr[tmp_agg_idx].count = flat_file_p->count; send_count_arr[tmp_agg_idx].fp_ind = disp_off_sz_ext_typesz[0]; send_count_arr[tmp_agg_idx].disp = disp_off_sz_ext_typesz[1]; send_count_arr[tmp_agg_idx].byte_off = disp_off_sz_ext_typesz[2]; send_count_arr[tmp_agg_idx].sz = disp_off_sz_ext_typesz[3]; send_count_arr[tmp_agg_idx].ext = disp_off_sz_ext_typesz[4]; send_count_arr[tmp_agg_idx].type_sz = disp_off_sz_ext_typesz[5]; } } #ifdef DEBUG2 fprintf(stderr, "my own flattened memtype: "); ADIOI_Print_flatlist_node(flat_mem_p); fprintf(stderr, "my own flattened filetype: "); ADIOI_Print_flatlist_node(flat_file_p); #endif if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) { ret = MPI_Alltoall(send_count_arr, sizeof(amount_and_extra_data_t), MPI_BYTE, recv_count_arr, sizeof(amount_and_extra_data_t), MPI_BYTE, fd->comm); if (ret != MPI_SUCCESS) { fprintf(stderr, "ADIOI_Exchange_file_views: MPI_Alltoall failed " "with error %d", ret); return; } } else { #ifdef MPI_STATUSES_IGNORE statuses = MPI_STATUSES_IGNORE; #else statuses = (MPI_Status *) ADIOI_Malloc(1 + nprocs * sizeof(MPI_Status)); #endif if (fd->is_agg) { MPI_Waitall(nprocs, recv_req_arr, statuses); ADIOI_Free(recv_req_arr); } MPI_Waitall(fd->hints->cb_nodes, send_req_arr, statuses); #ifndef MPI_STATUSES_IGNORE ADIOI_Free(statuses); #endif ADIOI_Free(send_req_arr); } #ifdef DEBUG2 if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) { fprintf(stderr, "send_count_arr:"); for (i = 0; i < nprocs; i++) { fprintf(stderr, "[%d]=%d ", i, send_count_arr[i].count); } fprintf(stderr, "\n"); fprintf(stderr, "recv_count_arr:"); for (i = 0; i < nprocs; i++) { fprintf(stderr, "[%d]=%d ", i, recv_count_arr[i].count); } fprintf(stderr, "\n"); } else { fprintf(stderr, "send_count_arr:"); for (i = 0; i < fd->hints->cb_nodes; i++) { fprintf(stderr, "[%d]=%d ", i, send_count_arr[i].count); } fprintf(stderr, "\n"); if (fd->is_agg) { fprintf(stderr, "recv_count_arr:"); for (i = 0; i < nprocs; i++) { fprintf(stderr, "[%d]=%d ", i, recv_count_arr[i].count); } fprintf(stderr, "\n"); } } #endif if (fd->hints->cb_alltoall == ADIOI_HINT_DISABLE) { for (i = 0; i < fd->hints->cb_nodes; i++) if (send_count_arr[i].count > 0) send_req_arr_sz++; } /* Figure out how many counts to send/recv */ for (i = 0; i < nprocs; i++) { if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) { if (send_count_arr[i].count > 0) send_req_arr_sz++; } /* Only aggregators should recv */ if (fd->is_agg) { if (recv_count_arr[i].count > 0) { if ((client_file_view_state_arr[i].flat_type_p = (ADIOI_Flatlist_node *) ADIOI_Malloc(sizeof(ADIOI_Flatlist_node))) == NULL) { fprintf(stderr, "ADIOI_Exchange_file_views: malloc " "flat_type_p failed\n"); } client_file_view_state_arr[i].flat_type_p->count = recv_count_arr[i].count; client_file_view_state_arr[i].flat_type_p->indices = (ADIO_Offset *) ADIOI_Calloc(recv_count_arr[i].count, sizeof(ADIO_Offset)); client_file_view_state_arr[i].flat_type_p->blocklens = (ADIO_Offset *) ADIOI_Calloc(recv_count_arr[i].count, sizeof(ADIO_Offset)); /* Copy the extra data out of the stuff we Alltoall'd */ memcpy(&client_file_view_state_arr[i].fp_ind, &recv_count_arr[i].fp_ind, 6 * sizeof(ADIO_Offset)); recv_req_arr_sz++; } } } /* Since ADIOI_Calloc may do other things we add the +1 * to avoid a 0-size malloc */ send_req_arr = (MPI_Request *) ADIOI_Calloc(2 * (send_req_arr_sz) + 1, sizeof(MPI_Request)); j = 0; if (recv_req_arr_sz > 0) { assert(fd->is_agg); recv_req_arr = (MPI_Request *) ADIOI_Calloc(2 * (recv_req_arr_sz), sizeof(MPI_Request)); for (i = 0; i < nprocs; i++) { if (recv_count_arr[i].count > 0) { MPI_Irecv(client_file_view_state_arr[i].flat_type_p->indices, recv_count_arr[i].count, ADIO_OFFSET, i, INDICES, fd->comm, &recv_req_arr[j]); j++; MPI_Irecv(client_file_view_state_arr[i].flat_type_p->blocklens, recv_count_arr[i].count, ADIO_OFFSET, i, BLOCK_LENS, fd->comm, &recv_req_arr[j]); j++; } } } if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) { j = 0; for (i = 0; i < nprocs; i++) { if (send_count_arr[i].count > 0) { MPI_Isend(flat_file_p->indices, send_count_arr[i].count, ADIO_OFFSET, i, INDICES, fd->comm, &send_req_arr[j]); j++; MPI_Isend(flat_file_p->blocklens, send_count_arr[i].count, ADIO_OFFSET, i, BLOCK_LENS, fd->comm, &send_req_arr[j]); j++; } } } else { j = 0; for (i = 0; i < fd->hints->cb_nodes; i++) { if (send_count_arr[i].count > 0) { MPI_Isend(flat_file_p->indices, send_count_arr[i].count, ADIO_OFFSET, fd->hints->ranklist[i], INDICES, fd->comm, &send_req_arr[j]); j++; MPI_Isend(flat_file_p->blocklens, send_count_arr[i].count, ADIO_OFFSET, fd->hints->ranklist[i], BLOCK_LENS, fd->comm, &send_req_arr[j]); j++; } } } /* Since ADIOI_Malloc may do other things we add the +1 * to avoid a 0-size malloc */ #ifdef MPI_STATUSES_IGNORE statuses = MPI_STATUSES_IGNORE; #else statuses = (MPI_Status *) ADIOI_Malloc(1 + 2 * MPL_MAX(send_req_arr_sz, recv_req_arr_sz) * sizeof(MPI_Status)); #endif if (send_req_arr_sz > 0) { MPI_Waitall(2 * send_req_arr_sz, send_req_arr, statuses); ADIOI_Free(send_count_arr); ADIOI_Free(send_req_arr); } if (recv_req_arr_sz > 0) { MPI_Waitall(2 * recv_req_arr_sz, recv_req_arr, statuses); ADIOI_Free(recv_count_arr); ADIOI_Free(recv_req_arr); } #ifndef MPI_STATUSES_IGNORE ADIOI_Free(statuses); #endif if (fd->is_agg == 1) { ADIOI_init_view_state(file_ptr_type, nprocs, client_file_view_state_arr, TEMP_OFF); ADIOI_init_view_state(file_ptr_type, nprocs, client_file_view_state_arr, REAL_OFF); } #ifdef DEBUG if (fd->is_agg == 1) { ADIOI_Flatlist_node *fr_node_p; for (i = 0; i < nprocs; i++) { fprintf(stderr, "client_file_view_state_arr[%d]=(fp_ind=%Ld," "disp=%Ld,byte_off=%Ld,sz=%Ld,ext=%Ld\n", i, client_file_view_state_arr[i].fp_ind, client_file_view_state_arr[i].disp, client_file_view_state_arr[i].byte_off, client_file_view_state_arr[i].sz, client_file_view_state_arr[i].ext); } fr_node_p = ADIOI_Flatten_and_find(fd->file_realm_types[fd->my - cb_nodes_index]); assert(fr_node_p != NULL); fprintf(stderr, "my file realm (idx=%d,st_off=%Ld) ", fd->my_cb_nodes_index, fd->file_realm_st_offs[fd->my_cb_nodes_index]); ADIOI_Print_flatlist_node(fr_node_p); } #endif #ifdef DEBUG2 if (fd->is_agg == 1) { for (i = 0; i < nprocs; i++) { fprintf(stderr, "client_file_view_state_arr[%d]: ", i); ADIOI_Print_flatlist_node(client_file_view_state_arr[i].flat_type_p); } } #endif #ifdef AGGREGATION_PROFILE MPE_Log_event(5015, 0, NULL); #endif }
int main(int argc, char **argv) { int myRank, numPes; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numPes); MPI_Comm_rank(MPI_COMM_WORLD, &myRank); MPI_Request sreq[2], rreq[2]; int blockDimX, arrayDimX, arrayDimY; if (argc != 2 && argc != 3) { printf("%s [array_size] \n", argv[0]); printf("%s [array_size_X] [array_size_Y] \n", argv[0]); MPI_Abort(MPI_COMM_WORLD, -1); } if(argc == 2) { arrayDimY = arrayDimX = atoi(argv[1]); } else { arrayDimX = atoi(argv[1]); arrayDimY = atoi(argv[2]); } if (arrayDimX % numPes != 0) { printf("array_size_X % numPes != 0!\n"); MPI_Abort(MPI_COMM_WORLD, -1); } blockDimX = arrayDimX / numPes; int iterations = 0, i, j; double error = 1.0, max_error = 0.0; if(myRank == 0) { printf("Running Jacobi on %d processors\n", numPes); printf("Array Dimensions: %d %d\n", arrayDimX, arrayDimY); printf("Block Dimensions: %d\n", blockDimX); } double **temperature; double **new_temperature; /* allocate two dimensional arrays */ temperature = new double*[blockDimX+2]; new_temperature = new double*[blockDimX+2]; for (i=0; i<blockDimX+2; i++) { temperature[i] = new double[arrayDimY]; new_temperature[i] = new double[arrayDimY]; } for(i=0; i<blockDimX+2; i++) { for(j=0; j<arrayDimY; j++) { temperature[i][j] = 0.5; new_temperature[i][j] = 0.5; } } // boundary conditions if(myRank < numPes/2) { for(i=1; i<=blockDimX; i++) temperature[i][0] = 1.0; } if(myRank == numPes-1) { for(j=arrayDimY/2; j<arrayDimY; j++) temperature[blockDimX][j] = 0.0; } MPI_Barrier(MPI_COMM_WORLD); MPI_Pcontrol(1); startTime = MPI_Wtime(); while(/*error > 0.001 &&*/ iterations < MAX_ITER) { iterations++; /* Receive my bottom and top edge */ MPI_Irecv(&temperature[blockDimX+1][0], arrayDimY, MPI_DOUBLE, wrap_x(myRank+1), BOTTOM, MPI_COMM_WORLD, &rreq[BOTTOM-1]); MPI_Irecv(&temperature[0][0], arrayDimY, MPI_DOUBLE, wrap_x(myRank-1), TOP, MPI_COMM_WORLD, &rreq[TOP-1]); /* Send my top and bottom edge */ MPI_Isend(&temperature[1][0], arrayDimY, MPI_DOUBLE, wrap_x(myRank-1), BOTTOM, MPI_COMM_WORLD, &sreq[BOTTOM-1]); MPI_Isend(&temperature[blockDimX][0], arrayDimY, MPI_DOUBLE, wrap_x(myRank+1), TOP, MPI_COMM_WORLD, &sreq[TOP-1]); MPI_Waitall(2, rreq, MPI_STATUSES_IGNORE); MPI_Waitall(2, sreq, MPI_STATUSES_IGNORE); for(i=1; i<blockDimX+1; i++) { for(j=0; j<arrayDimY; j++) { /* update my value based on the surrounding values */ new_temperature[i][j] = (temperature[i-1][j]+temperature[i+1][j]+temperature[i][wrap_y(j-1)]+temperature[i][wrap_y(j+1)]+temperature[i][j]) * 0.2; } } max_error = error = 0.0; for(i=1; i<blockDimX+1; i++) { for(j=0; j<arrayDimY; j++) { error = fabs(new_temperature[i][j] - temperature[i][j]); if(error > max_error) max_error = error; } } double **tmp; tmp = temperature; temperature = new_temperature; new_temperature = tmp; // boundary conditions if(myRank < numPes/2) { for(i=1; i<=blockDimX; i++) temperature[i][0] = 1.0; } if(myRank == numPes-1) { for(j=arrayDimY/2; j<arrayDimY; j++) temperature[blockDimX][j] = 0.0; } //if(myRank == 0) printf("Iteration %d %f %f %f\n", iterations, max_error, temperature[1][0], temperature[1][1]); MPI_Allreduce(&max_error, &error, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); } /* end of while loop */ MPI_Barrier(MPI_COMM_WORLD); MPI_Pcontrol(0); if(myRank == 0) { endTime = MPI_Wtime(); printf("Completed %d iterations\n", iterations); printf("Time elapsed: %f\n", endTime - startTime); } MPI_Finalize(); return 0; } /* end function main */
void mpi_waitall(int *count, int *request, int *status, int *ierr) { *ierr = MPI_Waitall(*count, request, (MPI_Status *) status); return; }
int main( int argc, char *argv[] ) { int errs = 0; MPI_Comm comm; MPI_Request r[2]; MPI_Status s[2]; int errval, errclass; int b1[20], b2[20], rank, size, src, dest, i; MTest_Init( &argc, &argv ); /* Create some receive requests. tags 0-9 will succeed, tags 10-19 will be used for ERR_TRUNCATE (fewer than 20 messages will be used) */ comm = MPI_COMM_WORLD; MPI_Comm_rank( comm, &rank ); MPI_Comm_size( comm, &size ); src = 1; dest = 0; if (rank == dest) { MPI_Errhandler_set( comm, MPI_ERRORS_RETURN ); errval = MPI_Irecv( b1, 10, MPI_INT, src, 0, comm, &r[0] ); if (errval) { errs++; MTestPrintError( errval ); printf( "Error returned from Irecv\n" ); } errval = MPI_Irecv( b2, 10, MPI_INT, src, 10, comm, &r[1] ); if (errval) { errs++; MTestPrintError( errval ); printf( "Error returned from Irecv\n" ); } errval = MPI_Barrier(comm); if (errval) { errs++; MTestPrintError( errval ); printf( "Error returned from Barrier\n" ); } for (i=0; i<2; i++) { s[i].MPI_ERROR = -1; } errval = MPI_Waitall( 2, r, s ); MPI_Error_class( errval, &errclass ); if (errclass != MPI_ERR_IN_STATUS) { errs++; printf( "Did not get ERR_IN_STATUS in Waitall\n" ); } else { /* Check for success */ /* We allow ERR_PENDING (neither completed nor in error) in case the MPI implementation exits the Waitall when an error is detected. Thanks to Jim Hoekstra of Iowa State University and Kim McMahon for finding this bug in the test. */ for (i=0; i<2; i++) { if (s[i].MPI_TAG < 10 && (s[i].MPI_ERROR != MPI_SUCCESS && s[i].MPI_ERROR != MPI_ERR_PENDING)) { char msg[MPI_MAX_ERROR_STRING]; int msglen = MPI_MAX_ERROR_STRING; errs++; printf( "correct msg had error code %d\n", s[i].MPI_ERROR ); MPI_Error_string( s[i].MPI_ERROR, msg, &msglen ); printf( "Error message was %s\n", msg ); } else if (s[i].MPI_TAG >= 10 && s[i].MPI_ERROR == MPI_SUCCESS) { errs++; printf( "truncated msg had MPI_SUCCESS\n" ); } } } } else if (rank == src) { /* Send messages, then barrier so that the wait does not start until we are sure that the sends have begun */ MPI_Send( b1, 10, MPI_INT, dest, 0, comm ); MPI_Send( b2, 11, MPI_INT, dest, 10, comm ); MPI_Barrier(comm); } else { MPI_Barrier(comm); } MTest_Finalize( errs ); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int errs = 0; MPI_Status status, *status_array = 0; int count = 0, flag, idx, rc, errlen, *indices=0, outcnt; MPI_Request *reqs = 0; char errmsg[MPI_MAX_ERROR_STRING]; MTest_Init(&argc, &argv); MPI_Comm_set_errhandler( MPI_COMM_WORLD, MPI_ERRORS_RETURN ); rc = MPI_Testall( count, reqs, &flag, status_array ); if (rc != MPI_SUCCESS) { MPI_Error_string( rc, errmsg, &errlen ); printf( "MPI_Testall returned failure: %s\n", errmsg ); errs ++; } else if (!flag) { printf( "MPI_Testall( 0, ... ) did not return a true flag\n") ; errs++; } rc = MPI_Waitall( count, reqs, status_array ); if (rc != MPI_SUCCESS) { MPI_Error_string( rc, errmsg, &errlen ); printf( "MPI_Waitall returned failure: %s\n", errmsg ); errs ++; } rc = MPI_Testany( count, reqs, &idx, &flag, &status ); if (rc != MPI_SUCCESS) { MPI_Error_string( rc, errmsg, &errlen ); printf( "MPI_Testany returned failure: %s\n", errmsg ); errs ++; } else if (!flag) { printf( "MPI_Testany( 0, ... ) did not return a true flag\n") ; errs++; } rc = MPI_Waitany( count, reqs, &idx, &status ); if (rc != MPI_SUCCESS) { MPI_Error_string( rc, errmsg, &errlen ); printf( "MPI_Waitany returned failure: %s\n", errmsg ); errs ++; } rc = MPI_Testsome( count, reqs, &outcnt, indices, status_array ); if (rc != MPI_SUCCESS) { MPI_Error_string( rc, errmsg, &errlen ); printf( "MPI_Testsome returned failure: %s\n", errmsg ); errs ++; } rc = MPI_Waitsome( count, reqs, &outcnt, indices, status_array ); if (rc != MPI_SUCCESS) { MPI_Error_string( rc, errmsg, &errlen ); printf( "MPI_Waitsome returned failure: %s\n", errmsg ); errs ++; } MTest_Finalize( errs ); MPI_Finalize(); return 0; }
void boundary_conditions(double ****f) { int i, j, k, l, req_numS=0, req_numR=0; int r1,r2,z1,z2; int flag,cnt,z[7],ztemp,tag=10; double vrho,vphi,vth,An; char msg_err[100]; //for putlog+mpi_error int reslen; z[0]=1; z[1]=z[2]=z[3]=-1; // влияет на вид гран.условий (-1:жесткие, 1:свободные) z[4]=-1; z[5]=1; // rather for Anorm&Atau not Ar,Aphi,Az /*============================ divertor =================================*/ if(t_cur>100) // divertors are off till t sec if(n[2]==0) for(i=0;i<m1;i++) for(j=ghost;j<=ghost;j++) for(k=0;k<m3;k++) { vrho = f[3][i][j][k]*costh[i][k]+f[1][i][j][k]*sinth[i][k]; vth = -f[3][i][j][k]*sinth[i][k]+f[1][i][j][k]*costh[i][k]; vphi = sqrt(pow(f[2][i][j][k],2)+pow(vth,2)); //=sqrt(vfi^2+vth^2) f[1][i][j][k] = vrho*sinth[i][k]+vphi*costh[i][k]*sin(chi[i][k]); f[2][i][j][k] = vphi*cos(chi[i][k]); f[3][i][j][k] = vrho*costh[i][k]-vphi*sinth[i][k]*sin(chi[i][k]); } /*-------------------------------- exchanging of ghosts -------------------------------------*/ // exchanging in phi-direction - periodical directions first if(pr_neighbour[2]>-1) if(pr_neighbour[2]==rank) CopyGridToBuffer(f,nut,buf_recv[2],0,n2,0,m1-1,mm2-1,m3-1); else { CopyGridToBuffer(f,nut,buf_send[2],0,ghost,0,m1-1,2*ghost-1,m3-1); MPI_Isend(buf_send[2],buf_size[1],MPI_DOUBLE,pr_neighbour[2],tag+2,MPI_COMM_WORLD,&SendRequest[req_numS++]); MPI_Irecv(buf_recv[2],buf_size[1],MPI_DOUBLE,pr_neighbour[2],tag+3,MPI_COMM_WORLD,&RecvRequest[req_numR++]); } if(pr_neighbour[3]>-1) if(pr_neighbour[3]==rank) CopyGridToBuffer(f,nut,buf_recv[3],0,ghost,0,m1-1,2*ghost-1,m3-1); else { CopyGridToBuffer(f,nut,buf_send[3],0,n2,0,m1-1,mm2-1,m3-1); MPI_Isend(buf_send[3],buf_size[1],MPI_DOUBLE,pr_neighbour[3],tag+3,MPI_COMM_WORLD,&SendRequest[req_numS++]); MPI_Irecv(buf_recv[3],buf_size[1],MPI_DOUBLE,pr_neighbour[3],tag+2,MPI_COMM_WORLD,&RecvRequest[req_numR++]); } MPI_Waitall(req_numR,RecvRequest,statuses); if(statuses[0].MPI_ERROR) {putlog("bc:error during transfer=",numlog++); MPI_Error_string(statuses[0].MPI_ERROR,msg_err,&reslen); msg_err[reslen++] = ','; msg_err[reslen]= 0; putlog(msg_err,numlog++); } else numlog++; if(pr_neighbour[2]>-1) CopyBufferToGrid(f,nut,buf_recv[2],0,0,0,m1-1,ghost-1,m3-1); if(pr_neighbour[3]>-1) CopyBufferToGrid(f,nut,buf_recv[3],0,mm2,0,m1-1,m2-1,m3-1); // exchanging in r-direction if(pr_neighbour[0]>-1) if(pr_neighbour[0]==rank) CopyGridToBuffer(f,nut,buf_recv[0],n1,0,0,mm1-1,m2-1,m3-1); else { CopyGridToBuffer(f,nut,buf_send[0],ghost,0,0,2*ghost-1,m2-1,m3-1); MPI_Isend(buf_send[0],buf_size[0],MPI_DOUBLE,pr_neighbour[0],tag,MPI_COMM_WORLD,&SendRequest[req_numS++]); MPI_Irecv(buf_recv[0],buf_size[0],MPI_DOUBLE,pr_neighbour[0],tag+1,MPI_COMM_WORLD,&RecvRequest[req_numR++]); } if(pr_neighbour[1]>-1) if(pr_neighbour[1]==rank) CopyGridToBuffer(f,nut,buf_recv[1],ghost,0,0,2*ghost-1,m2-1,m3-1); else { CopyGridToBuffer(f,nut,buf_send[1],n1,0,0,mm1-1,m2-1,m3-1); MPI_Isend(buf_send[1],buf_size[0],MPI_DOUBLE,pr_neighbour[1],tag+1,MPI_COMM_WORLD,&SendRequest[req_numS++]); MPI_Irecv(buf_recv[1],buf_size[0],MPI_DOUBLE,pr_neighbour[1],tag,MPI_COMM_WORLD,&RecvRequest[req_numR++]); } MPI_Waitall(req_numR,RecvRequest,statuses); if(statuses[0].MPI_ERROR) {putlog("bc:error during transfer=",numlog++); MPI_Error_string(statuses[0].MPI_ERROR,msg_err,&reslen); msg_err[reslen++] = ','; msg_err[reslen]= 0; putlog(msg_err,numlog++); } else numlog++; if(pr_neighbour[0]>-1) CopyBufferToGrid(f,nut,buf_recv[0],0,0,0,ghost-1,m2-1,m3-1); if(pr_neighbour[1]>-1) CopyBufferToGrid(f,nut,buf_recv[1],mm1,0,0,m1-1,m2-1,m3-1); // exchanging in z-direction if(pr_neighbour[4]>-1) if(pr_neighbour[4]==rank) CopyGridToBuffer(f,nut,buf_recv[4],0,0,mm3,m1-1,m2-1,m3-1); else { CopyGridToBuffer(f,nut,buf_send[4],0,0,ghost,m1-1,m2-1,2*ghost-1); MPI_Isend(buf_send[4],buf_size[2],MPI_DOUBLE,pr_neighbour[4],tag+4,MPI_COMM_WORLD,&SendRequest[req_numS++]); MPI_Irecv(buf_recv[4],buf_size[2],MPI_DOUBLE,pr_neighbour[4],tag+5,MPI_COMM_WORLD,&RecvRequest[req_numR++]); } if(pr_neighbour[5]>-1) if(pr_neighbour[5]==rank) CopyGridToBuffer(f,nut,buf_recv[5],0,0,0,m1-1,m2-1,ghost-1); else { CopyGridToBuffer(f,nut,buf_send[5],0,0,n3,m1-1,m2-1,mm3-1); MPI_Isend(buf_send[5],buf_size[2],MPI_DOUBLE,pr_neighbour[5],tag+5,MPI_COMM_WORLD,&SendRequest[req_numS++]); MPI_Irecv(buf_recv[5],buf_size[2],MPI_DOUBLE,pr_neighbour[5],tag+4,MPI_COMM_WORLD,&RecvRequest[req_numR++]); } MPI_Waitall(req_numR,RecvRequest,statuses); if(statuses[0].MPI_ERROR) {putlog("bc:error during transfer=",numlog++); MPI_Error_string(statuses[0].MPI_ERROR,msg_err,&reslen); msg_err[reslen++] = ','; msg_err[reslen]= 0; putlog(msg_err,numlog++); } else numlog++; if(pr_neighbour[4]>-1) CopyBufferToGrid(f,nut,buf_recv[4],0,0,0,m1-1,m2-1,ghost-1); if(pr_neighbour[5]>-1) CopyBufferToGrid(f,nut,buf_recv[5],0,0,mm3,m1-1,m2-1,m3-1); // MPI_Barrier(MPI_COMM_WORLD); // MPI_Startall(req_numR,RecvRequest); // MPI_Iprobe(MPI_ANY_SOURCE,MPI_ANY_TAG,MPI_COMM_WORLD,&flag,statuses); // if(flag==0) putlog("bc:error during transfer=",numlog++); // MPI_Testall(req_numR,RecvRequest,&flag,statuses); // MPI_Get_count(statuses,MPI_DOUBLE,&cnt); // MPI_Waitall(req_numR,RecvRequest,statuses); /*----------------------- filling of ghost nodes ------------------------------*/ for(i=0;i<m1;i++) for(j=0;j<m2;j++) for(k=0;k<m3;k++) { if(isType(node[i][k],NodeGhostFluid)) { r2=(r1=floor(refr_f[i][k]))+1; z2=(z1=floor(refz_f[i][k]))+1; for(l=0;l<=3;l++) f[l][i][j][k] = z[l]*( (refr_f[i][k]-r2)*(f[l][r1][j][z1]*(refz_f[i][k]-z2)-f[l][r1][j][z2]*(refz_f[i][k]-z1)) +(refr_f[i][k]-r1)*(f[l][r2][j][z2]*(refz_f[i][k]-z1)-f[l][r2][j][z1]*(refz_f[i][k]-z2)) ); nut[i][j][k] = (refr_f[i][k]-r2)*(nut[r1][j][z1]*(refz_f[i][k]-z2)-nut[r1][j][z2]*(refz_f[i][k]-z1)) + (refr_f[i][k]-r1)*(nut[r2][j][z2]*(refz_f[i][k]-z1)-nut[r2][j][z1]*(refz_f[i][k]-z2)); } if(isType(node[i][k],NodeGhostMagn)) { r1=floor(refr_m[i][k]+0.5); z1=floor(refz_m[i][k]+0.5); An = ( f[4][r1][j][z1]*(r1-i)+f[6][r1][j][z1]*(z1-k) )/ ( (r1-i)*(r1-i) + (z1-k)*(z1-k) ); /* for(l=4;l<=6;l++) f[l][i][j][k] = z[l]*f[l][r1][j][z1];*/ // if((r1-i)*(z1-k)) ztemp = z[5]; else ztemp = z[4]; ztemp = ((r1-i)*(z1-k)==0) ? z[5] : z[4]; ztemp = z[5]; f[4][i][j][k] = ztemp*f[4][r1][j][z1] + (z[4]-ztemp)*An*(r1-i); f[5][i][j][k] = z[5]*f[5][r1][j][z1] * (2*(rc-R)-(i-r1)*dx[0]) / (2*(rc-R)+(i-r1)*dx[0]); f[6][i][j][k] = ztemp*f[6][r1][j][z1] + (z[4]-ztemp)*An*(z1-k); } } return; }
void parallelComm::sendRecvPacketsV(std::vector<VPACKET> &sndPack, std::vector<VPACKET> &rcvPack) { std::vector<int> scount(2*nsend); std::vector<int> rcount(2*nrecv); std::vector<MPI_Request> request(2*(nsend+nrecv)); std::vector<MPI_Status> status(2*(nsend+nrecv)); for (int i = 0; i < nsend; i++) { scount[2*i] = sndPack[i].nints; scount[2*i+1] = sndPack[i].nreals; } int irnum = 0; for (int i = 0; i < nrecv; i++) MPI_Irecv(&(rcount[2*i]),2,MPI_INT,rcvMap[i],10,scomm,&request[irnum++]); for (int i = 0; i < nsend; i++) MPI_Isend(&(scount[2*i]),2,MPI_INT,sndMap[i],10,scomm,&request[irnum++]); MPI_Waitall(irnum,request.data(),status.data()); for (int i = 0; i < nrecv; i++) { rcvPack[i].nints = rcount[2*i]; rcvPack[i].nreals = rcount[2*i+1]; } irnum = 0; for (int i = 0; i < nrecv; i++) { if (rcvPack[i].nints > 0) { rcvPack[i].intData.resize(rcvPack[i].nints); MPI_Irecv(rcvPack[i].intData.data(),rcvPack[i].nints,MPI_INT, rcvMap[i],10,scomm,&request[irnum++]); } if (rcvPack[i].nreals > 0) { rcvPack[i].realData.resize(rcvPack[i].nreals); MPI_Irecv(rcvPack[i].realData.data(),rcvPack[i].nreals,MPI_DOUBLE, rcvMap[i],20,scomm,&request[irnum++]); } } for (int i = 0; i < nsend;i++) { if (sndPack[i].nints > 0) { MPI_Isend(sndPack[i].intData.data(),sndPack[i].nints,MPI_INT, sndMap[i],10,scomm,&request[irnum++]); } if (sndPack[i].nreals > 0) { MPI_Isend(sndPack[i].realData.data(),sndPack[i].nreals,MPI_DOUBLE, sndMap[i],20,scomm,&request[irnum++]); } } MPI_Waitall(irnum,request.data(),status.data()); }
int main(int argc, char *argv[]) { int rank; int size; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD,&size); MPI_Comm_rank(MPI_COMM_WORLD,&rank); // Build a 4x3 grid of subgrids /* +-----+-----+-----+ | 0 | 1 | 2 | |(0,0)|(0,1)|(0,2)| +-----+-----+-----+ | 3 | 4 | 5 | |(1,0)|(1,1)|(1,2)| +-----+-----+-----+ | 6 | 7 | 8 | |(2,0)|(2,1)|(2,2)| +-----+-----+-----+ | 9 | 10 | 11 | |(3,0)|(3,1)|(3,2)| +-----+-----+-----+ */ MPI_Comm Comm2d; int ndim; int dim[2]; int period[2]; // for periodic conditions int reorder; int coord[2]; // Setup and build cartesian grid ndim=2; dim[0]=4; dim[1]=3; period[0]=false; period[1]=false; reorder=true; MPI_Cart_create(MPI_COMM_WORLD,ndim,dim,period,reorder,&Comm2d); // Every processor prints it rank and coordinates MPI_Cart_coords(Comm2d,rank,2,coord); printf("P:%2d My coordinates are %d %d\n",rank,coord[0],coord[1]); // In Root mode: ask the rank of processor at coordinates (3,1) if(rank==0) { int id; // the requested processor id coord[0]=3; coord[1]=1; MPI_Cart_rank(Comm2d,coord,&id); printf("The processor at coords (%d, %d) has rank %d\n",coord[0],coord[1],id); } // Every processor build his neighbour map int nbrs[4]; MPI_Cart_shift(Comm2d,0,1,&nbrs[UP],&nbrs[DOWN]); MPI_Cart_shift(Comm2d,1,1,&nbrs[LEFT],&nbrs[RIGHT]); // prints its neighbours if (rank==7) { printf("P:%2d has neighbours (u,d,l,r): %2d %2d %2d %2d\n", rank,nbrs[UP],nbrs[DOWN],nbrs[LEFT],nbrs[RIGHT]); } // if everything looks good up to here, I'll perform a communication test. MPI_Barrier(MPI_COMM_WORLD); // Making a communication test MPI_Request reqSendRecv[8]; // every processor sends 4 INTs and receives 4 INTs MPI_Status status[8]; int out = rank; // communicate the rank number int in[4] = {}; // empty array int tag = 2; // tag for (int i = 0; i < 4; i++) { // following the neighbours order!! MPI_Isend( &out ,1,MPI_INT,nbrs[i],tag,MPI_COMM_WORLD,&reqSendRecv[ i ]); MPI_Irecv(&in[i],1,MPI_INT,nbrs[i],tag,MPI_COMM_WORLD,&reqSendRecv[i+4]); } MPI_Waitall(8,reqSendRecv,status); // print the communication output printf("P:%2d recived from ngbr(u,d,l,r): %2d %2d %2d %2d\n", rank,in[UP],in[DOWN],in[LEFT],in[RIGHT]); MPI_Finalize(); return 0; }
int main( int argc, char *argv[] ) { int errs = 0; int rank, size, dest, source; int i, indices[40]; MPI_Aint extent; int *buf, *bufs[MAX_MSGS]; MPI_Comm comm; MPI_Datatype dtype; MPI_Request req[MAX_MSGS]; MTest_Init( &argc, &argv ); comm = MPI_COMM_WORLD; MPI_Comm_rank( comm, &rank ); MPI_Comm_size( comm, &size ); source = 0; dest = size - 1; /* Setup by creating a blocked datatype that is likely to be processed in a piecemeal fashion */ for (i=0; i<30; i++) { indices[i] = i*40; } /* 30 blocks of size 10 */ MPI_Type_create_indexed_block( 30, 10, indices, MPI_INT, &dtype ); MPI_Type_commit( &dtype ); /* Create the corresponding message buffers */ MPI_Type_extent( dtype, &extent ); for (i=0; i<MAX_MSGS; i++) { bufs[i] = (int *)malloc( extent ); if (!bufs[i]) { fprintf( stderr, "Unable to allocate buffer %d of size %ld\n", i, (long)extent ); MPI_Abort( MPI_COMM_WORLD, 1 ); } } buf = (int *)malloc( 10 * 30 * sizeof(int) ); MPI_Barrier( MPI_COMM_WORLD ); if (rank == dest) { MTestSleep( 2 ); for (i=0; i<MAX_MSGS; i++) { MPI_Recv( buf, 10*30, MPI_INT, source, i, comm, MPI_STATUS_IGNORE ); } } else if (rank == source ) { for (i=0; i<MAX_MSGS; i++) { MPI_Isend( bufs[i], 1, dtype, dest, i, comm, &req[i] ); } MPI_Waitall( MAX_MSGS, req, MPI_STATUSES_IGNORE ); } MPI_Type_free( &dtype ); MTest_Finalize( errs ); MPI_Finalize(); return 0; }
int main( int argc, char **argv ) { MPI_Comm comm; MPI_Request r[MAX_REQ]; MPI_Status s[MAX_REQ]; int msgsize, maxmsg, root, i, j, size, rank, err = 0, msgcnt, toterr; int *sbuf, *rbuf; MPI_Init( &argc, &argv ); comm = MPI_COMM_WORLD; MPI_Comm_size( comm, &size ); MPI_Comm_rank( comm, &rank ); if (size < 2) { printf( "This test requires at least 2 processors\n" ); MPI_Abort( comm, 1 ); } /* First, try large blocking sends to root */ root = 0; maxmsg = MAX_MSG; msgsize = 128; msgcnt = MAX_MSG_CNT; if (rank == root && verbose) printf( "Blocking sends: " ); while (msgsize <= maxmsg) { if (rank == root) { if (verbose) { printf( "%d ", msgsize ); fflush( stdout ); } rbuf = (int *)malloc( msgsize * sizeof(int) ); if (!rbuf) { printf( "Could not allocate %d words\n", msgsize ); MPI_Abort( comm, 1 ); } for (i=0; i<size; i++) { if (i == rank) continue; for (j=0; j<msgcnt; j++) { SetupRdata( rbuf, msgsize ); MPI_Recv( rbuf, msgsize, MPI_INT, i, 2*i, comm, s ); err += CheckData( rbuf, msgsize, 2*i, s ); } } free( rbuf ); } else { sbuf = (int *)malloc( msgsize * sizeof(int) ); if (!sbuf) { printf( "Could not allocate %d words\n", msgsize ); MPI_Abort( comm, 1 ); } SetupData( sbuf, msgsize, 2*rank ); for (j=0; j<msgcnt; j++) MPI_Send( sbuf, msgsize, MPI_INT, root, 2*rank, comm ); free( sbuf ); } msgsize *= 4; } if (rank == 0 && verbose) { printf( "\n" ); fflush( stdout ); } /* Next, try unexpected messages with Isends */ msgsize = 128; maxmsg = MAX_MSG; msgcnt = MAX_REQ; if (rank == root && verbose) printf( "Unexpected recvs: " ); while (msgsize <= maxmsg) { if (rank == root) { if (verbose) { printf( "%d ", msgsize ); fflush( stdout ); } rbuf = (int *)malloc( msgsize * sizeof(int) ); if (!rbuf) { printf( "Could not allocate %d words\n", msgsize ); MPI_Abort( comm, 1 ); } MPI_Barrier( comm ); for (i=0; i<size; i++) { if (i == rank) continue; for (j=0; j<msgcnt; j++) { SetupRdata( rbuf, msgsize ); MPI_Recv( rbuf, msgsize, MPI_INT, i, 2*i, comm, s ); err += CheckData( rbuf, msgsize, 2*i, s ); } } free( rbuf ); } else { sbuf = (int *)malloc( msgsize * sizeof(int) ); if (!sbuf) { printf( "Could not allocate %d words\n", msgsize ); MPI_Abort( comm, 1 ); } SetupData( sbuf, msgsize, 2*rank ); for (j=0; j<msgcnt; j++) { MPI_Isend( sbuf, msgsize, MPI_INT, root, 2*rank, comm, &r[j] ); } MPI_Barrier( comm ); MPI_Waitall( msgcnt, r, s ); free( sbuf ); } msgsize *= 4; } if (rank == 0 && verbose) { printf( "\n" ); fflush( stdout ); } /* Try large synchronous blocking sends to root */ root = 0; msgsize = 128; maxmsg = MAX_MSG; if (rank == root && verbose) printf( "Synchronous sends: " ); while (msgsize <= maxmsg) { if (rank == root) { if (verbose) { printf( "%d ", msgsize ); fflush( stdout ); } rbuf = (int *)malloc( msgsize * sizeof(int) ); if (!rbuf) { printf( "Could not allocate %d words\n", msgsize ); MPI_Abort( comm, 1 ); } for (i=0; i<size; i++) { if (i == rank) continue; for (j=0; j<msgcnt; j++) { SetupRdata( rbuf, msgsize ); MPI_Recv( rbuf, msgsize, MPI_INT, i, 2*i, comm, s ); err += CheckData( rbuf, msgsize, 2*i, s ); } } free( rbuf ); } else { sbuf = (int *)malloc( msgsize * sizeof(int) ); if (!sbuf) { printf( "Could not allocate %d words\n", msgsize ); MPI_Abort( comm, 1 ); } SetupData( sbuf, msgsize, 2*rank ); for (j=0; j<msgcnt; j++) MPI_Ssend( sbuf, msgsize, MPI_INT, root, 2*rank, comm ); free( sbuf ); } msgsize *= 4; } if (rank == 0 && verbose) { printf( "\n" ); fflush( stdout ); } MPI_Allreduce( &err, &toterr, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); if (rank == 0) { if (toterr == 0) printf( "No errors\n" ); else printf( "!! found %d errors\n", toterr ); } if (toterr) { printf( "!! found %d errors on processor %d\n", err, rank ); } MPI_Finalize( ); return 0; }
int main (int argc, char **argv) { int nprocs = -1; int rank = -1; char processor_name[128]; int namelen = 128; int buf0[buf_size]; int buf1[buf_size]; MPI_Request aReq[2]; MPI_Status aStatus[2]; MPI_Status status; /* init */ MPI_Init (&argc, &argv); MPI_Comm_size (MPI_COMM_WORLD, &nprocs); MPI_Comm_rank (MPI_COMM_WORLD, &rank); MPI_Get_processor_name (processor_name, &namelen); printf ("(%d) is alive on %s\n", rank, processor_name); fflush (stdout); MPI_Barrier (MPI_COMM_WORLD); if (nprocs < 2) { printf ("not enough tasks\n"); } else { if (rank == 0) { memset (buf0, 0, buf_size); MPI_Send_init (buf0, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &aReq[0]); MPI_Recv_init (buf1, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &aReq[1]); MPI_Start (&aReq[0]); MPI_Start (&aReq[1]); MPI_Waitall (2, aReq, aStatus); memset (buf0, 1, buf_size); MPI_Startall (2, aReq); MPI_Waitall (2, aReq, aStatus); } else if (rank == 1) { memset (buf1, 1, buf_size); MPI_Recv_init (buf0, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD, &aReq[0]); MPI_Send_init (buf1, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD, &aReq[1]); MPI_Start (&aReq[0]); MPI_Start (&aReq[1]); MPI_Waitall (2, aReq, aStatus); memset (buf1, 0, buf_size); MPI_Startall (2, aReq); MPI_Waitall (2, aReq, aStatus); } } MPI_Barrier (MPI_COMM_WORLD); MPI_Request_free (&aReq[0]); MPI_Request_free (&aReq[1]); MPI_Finalize (); printf ("(%d) Finished normally\n", rank); }
double find_bibw(int size, int num_pairs, char *s_buf, char *r_buf) { /*This function is the bandwidth test that was previously part of main in osu_bibw, with the additional modification of being able to stream multiple stream multiple processors per node. As of the InfiniPath 1.3 release the code can also dynamically determine which processes are on which node and set things up appropriately.*/ double t_start = 0.0, t_end = 0.0, t = 0.0, max_time = 0.0, min_time = 0.0; double seconds_per_message_size, sum_loops, dloops; int i, j, myid, target, skip, loops, min_loops, max_loops, window_size; for (i = 0; i < size; i++) { s_buf[i] = 'a'; r_buf[i] = 'b'; } if (size < large_message_size) { skip = skip_small; min_loops = min_loops_small; max_loops = max_loops_small; window_size = window_size_small; seconds_per_message_size = seconds_per_message_size_small; } else { skip = skip_large; min_loops = min_loops_large; max_loops = max_loops_large; window_size = window_size_large; seconds_per_message_size = seconds_per_message_size_large; } MPI_Comm_rank (MPI_COMM_WORLD, &myid); MPI_Barrier (MPI_COMM_WORLD); if (pair_list[myid].sender == myid) { target = pair_list[myid].receiver; for (i = 0; i < max_loops + skip; i++) { if (i == skip) { MPI_Barrier (MPI_COMM_WORLD); t_start = MPI_Wtime(); } for (j = 0; j < window_size; j++) { MPI_Irecv (r_buf, size, MPI_CHAR, target, TAG_DATA, MPI_COMM_WORLD, recv_request + j); } for (j = 0; j < window_size; j++) { MPI_Isend (s_buf, size, MPI_CHAR, target, TAG_DATA, MPI_COMM_WORLD, send_request + j); } MPI_Waitall (window_size, send_request, reqstat); MPI_Waitall (window_size, recv_request, reqstat); MPI_Recv (r_buf, 4, MPI_CHAR, target, MPI_ANY_TAG, MPI_COMM_WORLD, &reqstat[0]); if (reqstat[0].MPI_TAG == TAG_DONE) { t_end = MPI_Wtime(); i++; break; } } if (t_end == 0.0) { t_end = MPI_Wtime(); } loops = i - skip; t = t_end - t_start; } else if (pair_list[myid].receiver == myid) { int tag = TAG_SKIP; target = pair_list[myid].sender; for (i = 0; i < max_loops + skip; i++) { if (i == skip) { tag = TAG_LOOP; MPI_Barrier (MPI_COMM_WORLD); t_start = MPI_Wtime(); } for (j = 0; j < window_size; j++) { MPI_Isend (s_buf, size, MPI_CHAR, target, TAG_DATA, MPI_COMM_WORLD, send_request + j); } for (j = 0; j < window_size; j++) { MPI_Irecv (r_buf, size, MPI_CHAR, target, TAG_DATA, MPI_COMM_WORLD, recv_request + j); } MPI_Waitall (window_size, send_request, reqstat); MPI_Waitall (window_size, recv_request, reqstat); if (tag == TAG_LOOP && (i - skip) >= (min_loops - 1) && MPI_Wtime() - t_start >= seconds_per_message_size) { MPI_Send (s_buf, 4, MPI_CHAR, target, TAG_DONE, MPI_COMM_WORLD); i++; break; } else { MPI_Send (s_buf, 4, MPI_CHAR, target, tag, MPI_COMM_WORLD); } } loops = i - skip; } else { MPI_Barrier(MPI_COMM_WORLD); } MPI_Reduce (&t, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, mpi_comm_sender); MPI_Reduce (&t, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0, mpi_comm_sender); MPI_Reduce (&loops, &max_loops, 1, MPI_INTEGER, MPI_MAX, 0, mpi_comm_sender); MPI_Reduce (&loops, &min_loops, 1, MPI_INTEGER, MPI_MIN, 0, mpi_comm_sender); dloops = (double) loops; MPI_Reduce (&dloops, &sum_loops, 1, MPI_DOUBLE, MPI_SUM, 0, mpi_comm_sender); if (myid==0) { double mbytes = ( (size * 2.0) / (1000 * 1000) ) * sum_loops * window_size; double bw = mbytes / max_time; if (debug) { printf("%d bytes, %.2f MB/s, %d to %d loops (range %.2f%%), " "%.3f to %.3f secs (range %.2f%%)\n", size, bw, min_loops, max_loops, 100.0 * ((double) max_loops / (double) min_loops - 1), min_time, max_time, 100.0 * (max_time / min_time - 1)); } return bw; } return 0; }
/*! \brief Called by PME-only ranks to receive coefficients and coordinates * * \param[in,out] pme_pp PME-PP communication structure. * \param[out] natoms Number of received atoms. * \param[out] box System box, if received. * \param[out] maxshift_x Maximum shift in X direction, if received. * \param[out] maxshift_y Maximum shift in Y direction, if received. * \param[out] lambda_q Free-energy lambda for electrostatics, if received. * \param[out] lambda_lj Free-energy lambda for Lennard-Jones, if received. * \param[out] bEnerVir Set to true if this is an energy/virial calculation step, otherwise set to false. * \param[out] step MD integration step number. * \param[out] grid_size PME grid size, if received. * \param[out] ewaldcoeff_q Ewald cut-off parameter for electrostatics, if received. * \param[out] ewaldcoeff_lj Ewald cut-off parameter for Lennard-Jones, if received. * \param[out] atomSetChanged Set to true only if the local domain atom data (charges/coefficients) * has been received (after DD) and should be reinitialized. Otherwise not changed. * * \retval pmerecvqxX All parameters were set, chargeA and chargeB can be NULL. * \retval pmerecvqxFINISH No parameters were set. * \retval pmerecvqxSWITCHGRID Only grid_size and *ewaldcoeff were set. * \retval pmerecvqxRESETCOUNTERS *step was set. */ static int gmx_pme_recv_coeffs_coords(gmx_pme_pp *pme_pp, int *natoms, matrix box, int *maxshift_x, int *maxshift_y, real *lambda_q, real *lambda_lj, gmx_bool *bEnerVir, int64_t *step, ivec *grid_size, real *ewaldcoeff_q, real *ewaldcoeff_lj, bool *atomSetChanged) { int status = -1; int nat = 0; #if GMX_MPI unsigned int flags = 0; int messages = 0; do { gmx_pme_comm_n_box_t cnb; cnb.flags = 0; /* Receive the send count, box and time step from the peer PP node */ MPI_Recv(&cnb, sizeof(cnb), MPI_BYTE, pme_pp->peerRankId, eCommType_CNB, pme_pp->mpi_comm_mysim, MPI_STATUS_IGNORE); /* We accumulate all received flags */ flags |= cnb.flags; *step = cnb.step; if (debug) { fprintf(debug, "PME only rank receiving:%s%s%s%s%s\n", (cnb.flags & PP_PME_CHARGE) ? " charges" : "", (cnb.flags & PP_PME_COORD ) ? " coordinates" : "", (cnb.flags & PP_PME_FINISH) ? " finish" : "", (cnb.flags & PP_PME_SWITCHGRID) ? " switch grid" : "", (cnb.flags & PP_PME_RESETCOUNTERS) ? " reset counters" : ""); } if (cnb.flags & PP_PME_FINISH) { status = pmerecvqxFINISH; } if (cnb.flags & PP_PME_SWITCHGRID) { /* Special case, receive the new parameters and return */ copy_ivec(cnb.grid_size, *grid_size); *ewaldcoeff_q = cnb.ewaldcoeff_q; *ewaldcoeff_lj = cnb.ewaldcoeff_lj; status = pmerecvqxSWITCHGRID; } if (cnb.flags & PP_PME_RESETCOUNTERS) { /* Special case, receive the step (set above) and return */ status = pmerecvqxRESETCOUNTERS; } if (cnb.flags & (PP_PME_CHARGE | PP_PME_SQRTC6 | PP_PME_SIGMA)) { *atomSetChanged = true; /* Receive the send counts from the other PP nodes */ for (auto &sender : pme_pp->ppRanks) { if (sender.rankId == pme_pp->peerRankId) { sender.numAtoms = cnb.natoms; } else { MPI_Irecv(&sender.numAtoms, sizeof(sender.numAtoms), MPI_BYTE, sender.rankId, eCommType_CNB, pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]); } } MPI_Waitall(messages, pme_pp->req.data(), pme_pp->stat.data()); messages = 0; nat = 0; for (const auto &sender : pme_pp->ppRanks) { nat += sender.numAtoms; } if (cnb.flags & PP_PME_CHARGE) { pme_pp->chargeA.resizeWithPadding(nat); } if (cnb.flags & PP_PME_CHARGEB) { pme_pp->chargeB.resize(nat); } if (cnb.flags & PP_PME_SQRTC6) { pme_pp->sqrt_c6A.resize(nat); } if (cnb.flags & PP_PME_SQRTC6B) { pme_pp->sqrt_c6B.resize(nat); } if (cnb.flags & PP_PME_SIGMA) { pme_pp->sigmaA.resize(nat); } if (cnb.flags & PP_PME_SIGMAB) { pme_pp->sigmaB.resize(nat); } pme_pp->x.resizeWithPadding(nat); pme_pp->f.resize(nat); /* maxshift is sent when the charges are sent */ *maxshift_x = cnb.maxshift_x; *maxshift_y = cnb.maxshift_y; /* Receive the charges in place */ for (int q = 0; q < eCommType_NR; q++) { real *bufferPtr; if (!(cnb.flags & (PP_PME_CHARGE<<q))) { continue; } switch (q) { case eCommType_ChargeA: bufferPtr = pme_pp->chargeA.data(); break; case eCommType_ChargeB: bufferPtr = pme_pp->chargeB.data(); break; case eCommType_SQRTC6A: bufferPtr = pme_pp->sqrt_c6A.data(); break; case eCommType_SQRTC6B: bufferPtr = pme_pp->sqrt_c6B.data(); break; case eCommType_SigmaA: bufferPtr = pme_pp->sigmaA.data(); break; case eCommType_SigmaB: bufferPtr = pme_pp->sigmaB.data(); break; default: gmx_incons("Wrong eCommType"); } nat = 0; for (const auto &sender : pme_pp->ppRanks) { if (sender.numAtoms > 0) { MPI_Irecv(bufferPtr+nat, sender.numAtoms*sizeof(real), MPI_BYTE, sender.rankId, q, pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]); nat += sender.numAtoms; if (debug) { fprintf(debug, "Received from PP rank %d: %d %s\n", sender.rankId, sender.numAtoms, (q == eCommType_ChargeA || q == eCommType_ChargeB) ? "charges" : "params"); } } } } } if (cnb.flags & PP_PME_COORD) { /* The box, FE flag and lambda are sent along with the coordinates * */ copy_mat(cnb.box, box); *lambda_q = cnb.lambda_q; *lambda_lj = cnb.lambda_lj; *bEnerVir = ((cnb.flags & PP_PME_ENER_VIR) != 0u); *step = cnb.step; /* Receive the coordinates in place */ nat = 0; for (const auto &sender : pme_pp->ppRanks) { if (sender.numAtoms > 0) { MPI_Irecv(pme_pp->x[nat], sender.numAtoms*sizeof(rvec), MPI_BYTE, sender.rankId, eCommType_COORD, pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]); nat += sender.numAtoms; if (debug) { fprintf(debug, "Received from PP rank %d: %d " "coordinates\n", sender.rankId, sender.numAtoms); } } } status = pmerecvqxX; } /* Wait for the coordinates and/or charges to arrive */ MPI_Waitall(messages, pme_pp->req.data(), pme_pp->stat.data()); messages = 0; } while (status == -1); #else GMX_UNUSED_VALUE(pme_pp); GMX_UNUSED_VALUE(box); GMX_UNUSED_VALUE(maxshift_x); GMX_UNUSED_VALUE(maxshift_y); GMX_UNUSED_VALUE(lambda_q); GMX_UNUSED_VALUE(lambda_lj); GMX_UNUSED_VALUE(bEnerVir); GMX_UNUSED_VALUE(step); GMX_UNUSED_VALUE(grid_size); GMX_UNUSED_VALUE(ewaldcoeff_q); GMX_UNUSED_VALUE(ewaldcoeff_lj); GMX_UNUSED_VALUE(atomSetChanged); status = pmerecvqxX; #endif if (status == pmerecvqxX) { *natoms = nat; } return status; }
void parallelComm::sendRecvPacketsAll(PACKET *sndPack, PACKET *rcvPack) { int i; int *sint,*sreal,*rint,*rreal; int tag,irnum; MPI_Request *request; MPI_Status *status; // sint=(int *)malloc(sizeof(int)*numprocs); sreal=(int *) malloc(sizeof(int)*numprocs); rint=(int *)malloc(sizeof(int)*numprocs); rreal=(int *) malloc(sizeof(int)*numprocs); request=(MPI_Request *) malloc(sizeof(MPI_Request)*4*numprocs); status=(MPI_Status *) malloc(sizeof(MPI_Status)*4*numprocs); // for(i=0;i<numprocs;i++){ sint[i]=sndPack[i].nints; sreal[i]=sndPack[i].nreals; } // MPI_Alltoall(sint,1,MPI_INT,rint,1,MPI_INT,scomm); MPI_Alltoall(sreal,1,MPI_INT,rreal,1,MPI_INT,scomm); // for(i=0;i<numprocs;i++) { rcvPack[i].nints=rint[i]; rcvPack[i].nreals=rreal[i]; } // irnum=0; for(i=0;i<numprocs;i++) { if (rcvPack[i].nints > 0) { tag=1; rcvPack[i].intData=(int *) malloc(sizeof(int)*rcvPack[i].nints); MPI_Irecv(rcvPack[i].intData,rcvPack[i].nints, MPI_INT,i, tag,scomm,&request[irnum++]); } if (rcvPack[i].nreals > 0) { tag=2; rcvPack[i].realData=(REAL *) malloc(sizeof(REAL)*rcvPack[i].nreals); MPI_Irecv(rcvPack[i].realData,rcvPack[i].nreals, MPI_DOUBLE,i, tag,scomm,&request[irnum++]); } } for(i=0;i<numprocs;i++) { if (sndPack[i].nints > 0){ tag=1; MPI_Isend(sndPack[i].intData,sndPack[i].nints, MPI_INT,i, tag,scomm,&request[irnum++]); } if (sndPack[i].nreals > 0){ tag=2; MPI_Isend(sndPack[i].realData,sndPack[i].nreals, MPI_DOUBLE,i, tag,scomm,&request[irnum++]); } } MPI_Pcontrol(1, "tioga_pc_waitall"); MPI_Waitall(irnum,request,status); MPI_Pcontrol(-1, "tioga_pc_waitall"); free(sint); free(sreal); free(rint); free(rreal); free(request); free(status); }
/*! \brief Send the PME mesh force, virial and energy to the PP-only ranks. */ static void gmx_pme_send_force_vir_ener(gmx_pme_pp *pme_pp, const rvec *f, matrix vir_q, real energy_q, matrix vir_lj, real energy_lj, real dvdlambda_q, real dvdlambda_lj, float cycles) { #if GMX_MPI gmx_pme_comm_vir_ene_t cve; int messages, ind_start, ind_end; cve.cycles = cycles; /* Now the evaluated forces have to be transferred to the PP nodes */ messages = 0; ind_end = 0; for (const auto &receiver : pme_pp->ppRanks) { ind_start = ind_end; ind_end = ind_start + receiver.numAtoms; if (MPI_Isend(const_cast<void *>(static_cast<const void *>(f[ind_start])), (ind_end-ind_start)*sizeof(rvec), MPI_BYTE, receiver.rankId, 0, pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]) != 0) { gmx_comm("MPI_Isend failed in do_pmeonly"); } } /* send virial and energy to our last PP node */ copy_mat(vir_q, cve.vir_q); copy_mat(vir_lj, cve.vir_lj); cve.energy_q = energy_q; cve.energy_lj = energy_lj; cve.dvdlambda_q = dvdlambda_q; cve.dvdlambda_lj = dvdlambda_lj; /* check for the signals to send back to a PP node */ cve.stop_cond = gmx_get_stop_condition(); cve.cycles = cycles; if (debug) { fprintf(debug, "PME rank sending to PP rank %d: virial and energy\n", pme_pp->peerRankId); } MPI_Isend(&cve, sizeof(cve), MPI_BYTE, pme_pp->peerRankId, 1, pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]); /* Wait for the forces to arrive */ MPI_Waitall(messages, pme_pp->req.data(), pme_pp->stat.data()); #else gmx_call("MPI not enabled"); GMX_UNUSED_VALUE(pme_pp); GMX_UNUSED_VALUE(f); GMX_UNUSED_VALUE(vir_q); GMX_UNUSED_VALUE(energy_q); GMX_UNUSED_VALUE(vir_lj); GMX_UNUSED_VALUE(energy_lj); GMX_UNUSED_VALUE(dvdlambda_q); GMX_UNUSED_VALUE(dvdlambda_lj); GMX_UNUSED_VALUE(cycles); #endif }
void parallelComm::recvPacketsV(void) { MPI_Waitall(nwait,reqs.data(),stats.data()); }
void _amps_wait_exchange(amps_Handle handle) { int i; int num; num = handle -> package -> num_send + handle -> package -> num_recv; if(num) { if(handle -> package -> num_recv) { for(i = 0; i < handle -> package -> num_recv; i++) { AMPS_CLEAR_INVOICE(handle -> package -> recv_invoices[i]); } } MPI_Waitall(num, handle -> package -> recv_requests, handle -> package -> status); } #ifdef AMPS_MPI_PACKAGE_LOWSTORAGE /* Needed by the DEC's; need better memory allocation strategy */ /* Need to uncommit packages when not in use */ /* amps_Commit followed by amps_UnCommit ????? */ if(handle -> package -> commited) { for(i = 0; i < handle -> package -> num_recv; i++) { if( handle -> package -> recv_invoices[i] -> mpi_type != MPI_DATATYPE_NULL ) { MPI_Type_free(&(handle -> package -> recv_invoices[i] -> mpi_type)); } MPI_Request_free(&(handle -> package -> recv_requests[i])); } for(i = 0; i < handle -> package -> num_send; i++) { if( handle -> package -> send_invoices[i] -> mpi_type != MPI_DATATYPE_NULL ) { MPI_Type_free(&handle -> package -> send_invoices[i] -> mpi_type); } MPI_Request_free(&(handle -> package -> send_requests[i])); } if(handle -> package -> recv_requests) { free(handle -> package -> recv_requests); handle -> package -> recv_requests = NULL; } if(handle -> package -> status) { free(handle -> package -> status); handle -> package -> status = NULL; } handle -> package -> commited = FALSE; } #endif }
void parallelComm::sendRecvPacketsCheck(PACKET *sndPack,PACKET *rcvPack) { int i; int *scount,*rcount; int tag,irnum; MPI_Request *request; MPI_Status *status; // scount=(int *)malloc(2*sizeof(int)*nsend); rcount=(int *) malloc(2*sizeof(int)*nrecv); request=(MPI_Request *) malloc(sizeof(MPI_Request)*2*(nsend+nrecv)); status=(MPI_Status *) malloc(sizeof(MPI_Status)*2*(nsend+nrecv)); // for(i=0;i<nsend;i++){ scount[2*i]=sndPack[i].nints; scount[2*i+1]=sndPack[i].nreals; } // irnum=0; tag=1; // for(i=0;i<nrecv;i++) MPI_Irecv(&(rcount[2*i]),2,MPI_INT,rcvMap[i],tag,scomm,&request[irnum++]); // for(i=0;i<nsend;i++) MPI_Isend(&(scount[2*i]),2,MPI_INT,sndMap[i],tag,scomm,&request[irnum++]); // MPI_Waitall(irnum,request,status); for(i=0;i<nrecv;i++) { rcvPack[i].nints=rcount[2*i]; rcvPack[i].nreals=rcount[2*i+1]; } //for(i=0;i<nsend;i++) // { // printf("%d sending %d to %d\n",myid,sndPack[i].nints,sndMap[i]); // } //for(i=0;i<nrecv;i++) // { // printf("%d receiving %d from %d\n",myid,rcvPack[i].nints,rcvMap[i]); // } // irnum=0; for(i=0;i<nrecv;i++) { if (rcvPack[i].nints > 0) { tag=1; rcvPack[i].intData=(int *) malloc(sizeof(int)*rcvPack[i].nints); MPI_Irecv(rcvPack[i].intData,rcvPack[i].nints, MPI_INT,rcvMap[i], tag,scomm,&request[irnum++]); } if (rcvPack[i].nreals > 0 ) { tag=2; rcvPack[i].realData=(REAL *) malloc(sizeof(REAL)*rcvPack[i].nreals); MPI_Irecv(rcvPack[i].realData,rcvPack[i].nreals, MPI_DOUBLE,rcvMap[i], tag,scomm,&request[irnum++]); } } // for(i=0;i<nsend;i++) { if (sndPack[i].nints > 0){ tag=1; MPI_Isend(sndPack[i].intData,sndPack[i].nints, MPI_INT,sndMap[i], tag,scomm,&request[irnum++]); } if (sndPack[i].nreals > 0){ tag=2; MPI_Isend(sndPack[i].realData,sndPack[i].nreals, MPI_DOUBLE,sndMap[i], tag,scomm,&request[irnum++]); } } MPI_Waitall(irnum,request,status); // free(scount); free(rcount); free(request); free(status); }
int main(int argc, char **argv) { double *x, *y; double mySUMx, mySUMy, mySUMxy, mySUMxx, SUMx, SUMy, SUMxy, SUMxx, SUMres, res, slope, y_intercept, y_estimate; int i,j,n,myid,numprocs,naverage,nremain,mypoints,ishift; int new_sleep (int *seconds); int std_sleep = 3; MPI_Status istatus; MPI_Status status[20]; FILE *infile; MPI_Request request[20]; infile = fopen("xydata", "r"); if (infile == NULL) printf("error opening file\n"); MPI_Init(&argc, &argv); MPI_Comm_rank (MPI_COMM_WORLD, &myid); MPI_Comm_size (MPI_COMM_WORLD, &numprocs); /* ---------------------------------------------------------- * Step 1: Process 0 reads data and sends the value of n * ---------------------------------------------------------- */ if (myid == 0) { printf ("Number of processes used: %d\n", numprocs); printf ("-------------------------------------\n"); printf ("The x coordinates on worker processes:\n"); /* this call is used to achieve a consistent output format */ new_sleep (&std_sleep); fscanf (infile, "%d", &n); x = (double *) malloc (n*sizeof(double)); y = (double *) malloc (n*sizeof(double)); for (i=0; i<n; i++) fscanf (infile, "%lf %lf", &x[i], &y[i]); for (i=1; i<numprocs; i++) MPI_Isend (&n, 1, MPI_INT, i, 10, MPI_COMM_WORLD, &request[i]); MPI_Waitall(numprocs-1, &request[1], status); } else { MPI_Irecv (&n, 1, MPI_INT, 0, 10, MPI_COMM_WORLD, &request[myid]); // Should wait before allocating mem MPI_Wait(&request[myid], &istatus); x = (double *) malloc (n*sizeof(double)); y = (double *) malloc (n*sizeof(double)); } /* ---------------------------------------------------------- */ naverage = n/numprocs; nremain = n % numprocs; /* ---------------------------------------------------------- * Step 2: Process 0 sends subsets of x and y ! * ---------------------------------------------------------- */ if (myid == 0) { ishift = 0; for (i=1; i<numprocs; i++) { if (i<nremain) { ishift += (naverage+1); mypoints = naverage+1; } else { ishift += naverage; mypoints = naverage; } if (i == nremain) ishift++; //ishift = i*naverage; //mypoints = (i < numprocs -1) ? naverage : naverage + nremain; MPI_Isend (&ishift, 1, MPI_INT, i, 1, MPI_COMM_WORLD, &request[0]); MPI_Isend (&mypoints, 1, MPI_INT, i, 2, MPI_COMM_WORLD, &request[1]); MPI_Isend (&x[ishift], mypoints, MPI_DOUBLE, i, 3, MPI_COMM_WORLD, &request[2]); MPI_Isend (&y[ishift], mypoints, MPI_DOUBLE, i, 4, MPI_COMM_WORLD, &request[3]); MPI_Waitall(4, request, status); } } else { /* ---------------the other processes receive---------------- */ MPI_Irecv (&ishift, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, &request[0]); MPI_Irecv (&mypoints, 1, MPI_INT, 0, 2, MPI_COMM_WORLD, &request[1]); MPI_Waitall(2, request, status); // ishift & mypoints var dependency MPI_Irecv (&x[ishift], mypoints, MPI_DOUBLE, 0, 3, MPI_COMM_WORLD, &request[0]); MPI_Irecv (&y[ishift], mypoints, MPI_DOUBLE, 0, 4, MPI_COMM_WORLD, &request[1]); MPI_Wait(&request[0], &istatus); printf ("id %d: ", myid); for (i=0; i<n; i++) printf("%4.2lf ", x[i]); printf ("\n"); MPI_Wait(&request[1], &istatus); /* ---------------------------------------------------------- */ } /* ---------------------------------------------------------- * Step 3: Each process calculates its partial sum * ---------------------------------------------------------- */ mySUMx = 0; mySUMy = 0; mySUMxy = 0; mySUMxx = 0; if (myid == 0) { ishift = 0; mypoints = (nremain>0) ? (naverage+1) : naverage; } for (j=0; j<mypoints; j++) { mySUMx = mySUMx + x[ishift+j]; mySUMy = mySUMy + y[ishift+j]; mySUMxy = mySUMxy + x[ishift+j]*y[ishift+j]; mySUMxx = mySUMxx + x[ishift+j]*x[ishift+j]; } /* ---------------------------------------------------------- * Step 4: Process 0 receives partial sums from the others * ---------------------------------------------------------- */ if (myid != 0) { MPI_Isend (&mySUMx, 1, MPI_DOUBLE, 0, 5, MPI_COMM_WORLD, &request[0]); MPI_Isend (&mySUMy, 1, MPI_DOUBLE, 0, 6, MPI_COMM_WORLD, &request[1]); MPI_Isend (&mySUMxy,1, MPI_DOUBLE, 0, 7, MPI_COMM_WORLD, &request[2]); MPI_Isend (&mySUMxx,1, MPI_DOUBLE, 0, 8, MPI_COMM_WORLD, &request[3]); MPI_Waitall(4, request, status); // We should do Waitall for the workers (myid!=0) // before MPI_Finalize } else { SUMx = mySUMx; SUMy = mySUMy; SUMxy = mySUMxy; SUMxx = mySUMxx; for (i=1; i<numprocs; i++) { MPI_Irecv (&mySUMx, 1, MPI_DOUBLE, i, 5, MPI_COMM_WORLD, &request[0]); MPI_Irecv (&mySUMy, 1, MPI_DOUBLE, i, 6, MPI_COMM_WORLD, &request[1]); MPI_Irecv (&mySUMxy,1, MPI_DOUBLE, i, 7, MPI_COMM_WORLD, &request[2]); MPI_Irecv (&mySUMxx,1, MPI_DOUBLE, i, 8, MPI_COMM_WORLD, &request[3]); MPI_Waitall(4, request, status); SUMx = SUMx + mySUMx; SUMy = SUMy + mySUMy; SUMxy = SUMxy + mySUMxy; SUMxx = SUMxx + mySUMxx; } } /* ---------------------------------------------------------- * Step 5: Process 0 does the final steps * ---------------------------------------------------------- */ if (myid == 0) { slope = ( SUMx*SUMy - n*SUMxy ) / ( SUMx*SUMx - n*SUMxx ); y_intercept = ( SUMy - slope*SUMx ) / n; /* this call is used to achieve a consistent output format */ new_sleep (&std_sleep); printf ("\n"); printf ("The linear equation that best fits the given data:\n"); printf (" y = %6.2lfx + %6.2lf\n", slope, y_intercept); printf ("--------------------------------------------------\n"); printf (" Original (x,y) Estimated y Residual\n"); printf ("--------------------------------------------------\n"); SUMres = 0; for (i=0; i<n; i++) { y_estimate = slope*x[i] + y_intercept; res = y[i] - y_estimate; SUMres = SUMres + res*res; printf (" (%6.2lf %6.2lf) %6.2lf %6.2lf\n", x[i], y[i], y_estimate, res); } printf("--------------------------------------------------\n"); printf("Residual sum = %6.2lf\n", SUMres); } /* ---------------------------------------------------------- */ MPI_Finalize(); }
void parallelComm::sendRecvPackets(PACKET *sndPack,PACKET *rcvPack) { int i; int *scount,*rcount; int tag,irnum; MPI_Request *request; MPI_Status *status; // scount=(int *)malloc(2*sizeof(int)*nsend); rcount=(int *) malloc(2*sizeof(int)*nrecv); request=(MPI_Request *) malloc(sizeof(MPI_Request)*2*(nsend+nrecv)); status=(MPI_Status *) malloc(sizeof(MPI_Status)*2*(nsend+nrecv)); // for(i=0;i<nsend;i++){ scount[2*i]=sndPack[i].nints; scount[2*i+1]=sndPack[i].nreals; } // irnum=0; tag=10; // for(i=0;i<nrecv;i++) MPI_Irecv(&(rcount[2*i]),2,MPI_INT,rcvMap[i],tag,scomm,&request[irnum++]); // for(i=0;i<nsend;i++) MPI_Isend(&(scount[2*i]),2,MPI_INT,sndMap[i],tag,scomm,&request[irnum++]); // MPI_Waitall(irnum,request,status); for(i=0;i<nrecv;i++) { rcvPack[i].nints=rcount[2*i]; rcvPack[i].nreals=rcount[2*i+1]; } // irnum=0; for(i=0;i<nrecv;i++) { if (rcvPack[i].nints > 0) { tag=10; rcvPack[i].intData=(int *) malloc(sizeof(int)*rcvPack[i].nints); MPI_Irecv(rcvPack[i].intData,rcvPack[i].nints, MPI_INT,rcvMap[i], tag,scomm,&request[irnum++]); } if (rcvPack[i].nreals > 0) { tag=20; rcvPack[i].realData=(REAL *) malloc(sizeof(REAL)*rcvPack[i].nreals); MPI_Irecv(rcvPack[i].realData,rcvPack[i].nreals, MPI_DOUBLE,rcvMap[i], tag,scomm,&request[irnum++]); } } // for(i=0;i<nsend;i++) { if (sndPack[i].nints > 0){ tag=10; MPI_Isend(sndPack[i].intData,sndPack[i].nints, MPI_INT,sndMap[i], tag,scomm,&request[irnum++]); } if (sndPack[i].nreals > 0){ tag=20; MPI_Isend(sndPack[i].realData,sndPack[i].nreals, MPI_DOUBLE,sndMap[i], tag,scomm,&request[irnum++]); } } MPI_Pcontrol(1, "tioga_pc_waitall"); MPI_Waitall(irnum,request,status); MPI_Pcontrol(-1, "tioga_pc_waitall"); // free(scount); free(rcount); free(request); free(status); }
int main(int argc, char **argv) { int *buf, i, rank, nints, len; char *filename, *tmp; int errs = 0, toterrs; MPI_File fh; MPI_Status status[NR_NBOPS]; MPI_Request request[NR_NBOPS]; int errcode = 0; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); /* process 0 takes the file name as a command-line argument and broadcasts it to other processes */ if (!rank) { i = 1; while ((i < argc) && strcmp("-fname", *argv)) { i++; argv++; } if (i >= argc) { fprintf(stderr, "\n*# Usage: async -fname filename\n\n"); MPI_Abort(MPI_COMM_WORLD, 1); } argv++; len = strlen(*argv); filename = (char *) malloc(len + 10); strcpy(filename, *argv); MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(filename, len + 10, MPI_CHAR, 0, MPI_COMM_WORLD); } else { MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); filename = (char *) malloc(len + 10); MPI_Bcast(filename, len + 10, MPI_CHAR, 0, MPI_COMM_WORLD); } buf = (int *) malloc(SIZE); nints = SIZE / sizeof(int); for (i = 0; i < nints; i++) buf[i] = rank * 100000 + i; /* each process opens a separate file called filename.'myrank' */ tmp = (char *) malloc(len + 10); strcpy(tmp, filename); sprintf(filename, "%s.%d", tmp, rank); errcode = MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); if (errcode != MPI_SUCCESS) { handle_error(errcode, "MPI_File_open"); } errcode = MPI_File_set_view(fh, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL); if (errcode != MPI_SUCCESS) { handle_error(errcode, "MPI_File_set_view"); } for (i = 0; i < NR_NBOPS; i++) { errcode = MPI_File_iwrite_at(fh, nints / NR_NBOPS * i, buf + (nints / NR_NBOPS * i), nints / NR_NBOPS, MPI_INT, &(request[i])); if (errcode != MPI_SUCCESS) { handle_error(errcode, "MPI_File_iwrite"); } } MPI_Waitall(NR_NBOPS, request, status); MPI_File_close(&fh); /* reopen the file and read the data back */ for (i = 0; i < nints; i++) buf[i] = 0; errcode = MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); if (errcode != MPI_SUCCESS) { handle_error(errcode, "MPI_File_open"); } errcode = MPI_File_set_view(fh, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL); if (errcode != MPI_SUCCESS) { handle_error(errcode, "MPI_File_set_view"); } for (i = 0; i < NR_NBOPS; i++) { errcode = MPI_File_iread_at(fh, nints / NR_NBOPS * i, buf + (nints / NR_NBOPS * i), nints / NR_NBOPS, MPI_INT, &(request[i])); if (errcode != MPI_SUCCESS) { handle_error(errcode, "MPI_File_open"); } } MPI_Waitall(NR_NBOPS, request, status); MPI_File_close(&fh); /* check if the data read is correct */ for (i = 0; i < nints; i++) { if (buf[i] != (rank * 100000 + i)) { errs++; fprintf(stderr, "Process %d: error, read %d, should be %d\n", rank, buf[i], rank * 100000 + i); } } MPI_Allreduce(&errs, &toterrs, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); if (rank == 0) { if (toterrs > 0) { fprintf(stderr, "Found %d errors\n", toterrs); } else { fprintf(stdout, " No Errors\n"); } } free(buf); free(filename); free(tmp); MPI_Finalize(); return 0; }
/* Sets error_code to MPI_SUCCESS if successful, or creates an error code * in the case of error. */ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf, ADIOI_Flatlist_node *flat_buf, ADIO_Offset *offset_list, ADIO_Offset *len_list, int *send_size, int *recv_size, ADIO_Offset off, int size, int *count, int *start_pos, int *partial_recv, int *sent_to_proc, int nprocs, int myrank, int buftype_is_contig, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, ADIO_Offset *fd_start, ADIO_Offset *fd_end, ADIOI_Access *others_req, int *send_buf_idx, int *curr_to_proc, int *done_to_proc, int *hole, int iter, MPI_Aint buftype_extent, int *buf_idx, int *error_code) { int i, j, k, *tmp_len, nprocs_recv, nprocs_send, err; char **send_buf = NULL; MPI_Request *requests, *send_req; MPI_Datatype *recv_types; MPI_Status *statuses, status; int *srt_len=NULL, sum; ADIO_Offset *srt_off=NULL; static char myname[] = "ADIOI_W_EXCHANGE_DATA"; /* exchange recv_size info so that each process knows how much to send to whom. */ MPI_Alltoall(recv_size, 1, MPI_INT, send_size, 1, MPI_INT, fd->comm); /* create derived datatypes for recv */ nprocs_recv = 0; for (i=0; i<nprocs; i++) if (recv_size[i]) nprocs_recv++; recv_types = (MPI_Datatype *) ADIOI_Malloc((nprocs_recv+1)*sizeof(MPI_Datatype)); /* +1 to avoid a 0-size malloc */ tmp_len = (int *) ADIOI_Malloc(nprocs*sizeof(int)); j = 0; for (i=0; i<nprocs; i++) { if (recv_size[i]) { /* take care if the last off-len pair is a partial recv */ if (partial_recv[i]) { k = start_pos[i] + count[i] - 1; tmp_len[i] = others_req[i].lens[k]; others_req[i].lens[k] = partial_recv[i]; } ADIOI_Type_create_hindexed_x(count[i], &(others_req[i].lens[start_pos[i]]), &(others_req[i].mem_ptrs[start_pos[i]]), MPI_BYTE, recv_types+j); /* absolute displacements; use MPI_BOTTOM in recv */ MPI_Type_commit(recv_types+j); j++; } } /* To avoid a read-modify-write, check if there are holes in the data to be written. For this, merge the (sorted) offset lists others_req using a heap-merge. */ sum = 0; for (i=0; i<nprocs; i++) sum += count[i]; /* valgrind-detcted optimization: if there is no work on this process we do * not need to search for holes */ if (sum) { srt_off = (ADIO_Offset *) ADIOI_Malloc(sum*sizeof(ADIO_Offset)); srt_len = (int *) ADIOI_Malloc(sum*sizeof(int)); ADIOI_Heap_merge(others_req, count, srt_off, srt_len, start_pos, nprocs, nprocs_recv, sum); } /* for partial recvs, restore original lengths */ for (i=0; i<nprocs; i++) if (partial_recv[i]) { k = start_pos[i] + count[i] - 1; others_req[i].lens[k] = tmp_len[i]; } ADIOI_Free(tmp_len); /* check if there are any holes. If yes, must do read-modify-write. * holes can be in three places. 'middle' is what you'd expect: the * processes are operating on noncontigous data. But holes can also show * up at the beginning or end of the file domain (see John Bent ROMIO REQ * #835). Missing these holes would result in us writing more data than * recieved by everyone else. */ *hole = 0; if (sum) { if (off != srt_off[0]) /* hole at the front */ *hole = 1; else { /* coalesce the sorted offset-length pairs */ for (i=1; i<sum; i++) { if (srt_off[i] <= srt_off[0] + srt_len[0]) { /* ok to cast: operating on cb_buffer_size chunks */ int new_len = (int)srt_off[i] + srt_len[i] - (int)srt_off[0]; if (new_len > srt_len[0]) srt_len[0] = new_len; } else break; } if (i < sum || size != srt_len[0]) /* hole in middle or end */ *hole = 1; } ADIOI_Free(srt_off); ADIOI_Free(srt_len); } if (nprocs_recv) { if (*hole) { ADIO_ReadContig(fd, write_buf, size, MPI_BYTE, ADIO_EXPLICIT_OFFSET, off, &status, &err); /* --BEGIN ERROR HANDLING-- */ if (err != MPI_SUCCESS) { *error_code = MPIO_Err_create_code(err, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**ioRMWrdwr", 0); return; } /* --END ERROR HANDLING-- */ } } nprocs_send = 0; for (i=0; i < nprocs; i++) if (send_size[i]) nprocs_send++; if (fd->atomicity) { /* bug fix from Wei-keng Liao and Kenin Coloma */ requests = (MPI_Request *) ADIOI_Malloc((nprocs_send+1)*sizeof(MPI_Request)); send_req = requests; } else { requests = (MPI_Request *) ADIOI_Malloc((nprocs_send+nprocs_recv+1)*sizeof(MPI_Request)); /* +1 to avoid a 0-size malloc */ /* post receives */ j = 0; for (i=0; i<nprocs; i++) { if (recv_size[i]) { MPI_Irecv(MPI_BOTTOM, 1, recv_types[j], i, myrank+i+100*iter, fd->comm, requests+j); j++; } } send_req = requests + nprocs_recv; } /* post sends. if buftype_is_contig, data can be directly sent from user buf at location given by buf_idx. else use send_buf. */ #ifdef AGGREGATION_PROFILE MPE_Log_event (5032, 0, NULL); #endif if (buftype_is_contig) { j = 0; for (i=0; i < nprocs; i++) if (send_size[i]) { MPI_Isend(((char *) buf) + buf_idx[i], send_size[i], MPI_BYTE, i, myrank+i+100*iter, fd->comm, send_req+j); j++; buf_idx[i] += send_size[i]; } } else if (nprocs_send) { /* buftype is not contig */ send_buf = (char **) ADIOI_Malloc(nprocs*sizeof(char*)); for (i=0; i < nprocs; i++) if (send_size[i]) send_buf[i] = (char *) ADIOI_Malloc(send_size[i]); ADIOI_Fill_send_buffer(fd, buf, flat_buf, send_buf, offset_list, len_list, send_size, send_req, sent_to_proc, nprocs, myrank, contig_access_count, min_st_offset, fd_size, fd_start, fd_end, send_buf_idx, curr_to_proc, done_to_proc, iter, buftype_extent); /* the send is done in ADIOI_Fill_send_buffer */ } if (fd->atomicity) { /* bug fix from Wei-keng Liao and Kenin Coloma */ j = 0; for (i=0; i<nprocs; i++) { MPI_Status wkl_status; if (recv_size[i]) { MPI_Recv(MPI_BOTTOM, 1, recv_types[j], i, myrank+i+100*iter, fd->comm, &wkl_status); j++; } } } for (i=0; i<nprocs_recv; i++) MPI_Type_free(recv_types+i); ADIOI_Free(recv_types); if (fd->atomicity) { /* bug fix from Wei-keng Liao and Kenin Coloma */ statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send+1) * \ sizeof(MPI_Status)); /* +1 to avoid a 0-size malloc */ } else { statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send+nprocs_recv+1) * \ sizeof(MPI_Status)); /* +1 to avoid a 0-size malloc */ } #ifdef NEEDS_MPI_TEST i = 0; if (fd->atomicity) { /* bug fix from Wei-keng Liao and Kenin Coloma */ while (!i) MPI_Testall(nprocs_send, send_req, &i, statuses); } else { while (!i) MPI_Testall(nprocs_send+nprocs_recv, requests, &i, statuses); } #else if (fd->atomicity) /* bug fix from Wei-keng Liao and Kenin Coloma */ MPI_Waitall(nprocs_send, send_req, statuses); else MPI_Waitall(nprocs_send+nprocs_recv, requests, statuses); #endif #ifdef AGGREGATION_PROFILE MPE_Log_event (5033, 0, NULL); #endif ADIOI_Free(statuses); ADIOI_Free(requests); if (!buftype_is_contig && nprocs_send) { for (i=0; i < nprocs; i++) if (send_size[i]) ADIOI_Free(send_buf[i]); ADIOI_Free(send_buf); } }
int main(int argc, char *argv[]) { MPI_Request r; MPI_Status s; // int flag; int buf[10]; int rbuf[10]; int tag = 27; int dest = 0; int rank, size; MPI_Init( &argc, &argv ); MPI_Comm_size( MPI_COMM_WORLD, &size ); MPI_Comm_rank( MPI_COMM_WORLD, &rank ); /* Create a persistent send request */ // tout le monde prépare l'envoi à 0 MPI_Send_init( buf, 10, MPI_INT, dest, tag, MPI_COMM_WORLD, &r ); /* Use that request */ if (rank == 0) { // on alloue un tableau de size request pour les irecv MPI_Request *rr = (MPI_Request *)malloc(size * sizeof(MPI_Request)); for (int i=0; i<size; i++) { // 0 va recevoir de tout le monde MPI_Irecv( rbuf, 10, MPI_INT, i, tag, MPI_COMM_WORLD, &rr[i] ); } // 0 va envoyer à 0 MPI_Start( &r ); // 0 envoi à 0 MPI_Wait( &r, &s ); // 0 recoit de tout le monde MPI_Waitall( size, rr, MPI_STATUSES_IGNORE ); free(rr); } else { // non-0 va envoyer à 0 MPI_Start( &r ); // non-0 envoi à 0 MPI_Wait( &r, &s ); } MPI_Request_free( &r ); // if (rank == 0) // { // MPI_Request sr; // /* Create a persistent receive request */ // // 0 prépare la récéption de tout le monde // MPI_Recv_init( rbuf, 10, MPI_INT, MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &r ); // // 0 va envoyer à 0 // MPI_Isend( buf, 10, MPI_INT, 0, tag, MPI_COMM_WORLD, &sr ); // for (int i=0; i<size; i++) { // // 0 va recevoir de tout le monde // MPI_Start( &r ); // // 0 recoit de tout le monde // MPI_Wait( &r, &s ); // } // // 0 envoi à 0 // MPI_Wait( &sr, &s ); // MPI_Request_free( &r ); // } // else { // // non-0 envoi à 0 // MPI_Send( buf, 10, MPI_INT, 0, tag, MPI_COMM_WORLD ); // } MPI_Finalize(); return 0; }
static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node *flat_buf, ADIO_Offset *offset_list, ADIO_Offset *len_list, int *send_size, int *recv_size, int *count, int *start_pos, int *partial_send, int *recd_from_proc, int nprocs, int myrank, int buftype_is_contig, int contig_access_count, ADIO_Offset min_st_offset, ADIO_Offset fd_size, ADIO_Offset *fd_start, ADIO_Offset *fd_end, ADIOI_Access *others_req, int iter, MPI_Aint buftype_extent, int *buf_idx) { int i, j, k=0, tmp=0, nprocs_recv, nprocs_send; char **recv_buf = NULL; MPI_Request *requests; MPI_Datatype send_type; MPI_Status *statuses; /* exchange send_size info so that each process knows how much to receive from whom and how much memory to allocate. */ MPI_Alltoall(send_size, 1, MPI_INT, recv_size, 1, MPI_INT, fd->comm); nprocs_recv = 0; for (i=0; i < nprocs; i++) if (recv_size[i]) nprocs_recv++; nprocs_send = 0; for (i=0; i<nprocs; i++) if (send_size[i]) nprocs_send++; requests = (MPI_Request *) ADIOI_Malloc((nprocs_send+nprocs_recv+1)*sizeof(MPI_Request)); /* +1 to avoid a 0-size malloc */ /* post recvs. if buftype_is_contig, data can be directly recd. into user buf at location given by buf_idx. else use recv_buf. */ #ifdef AGGREGATION_PROFILE MPE_Log_event (5032, 0, NULL); #endif if (buftype_is_contig) { j = 0; for (i=0; i < nprocs; i++) if (recv_size[i]) { MPI_Irecv(((char *) buf) + buf_idx[i], recv_size[i], MPI_BYTE, i, myrank+i+100*iter, fd->comm, requests+j); j++; buf_idx[i] += recv_size[i]; } } else { /* allocate memory for recv_buf and post receives */ recv_buf = (char **) ADIOI_Malloc(nprocs * sizeof(char*)); for (i=0; i < nprocs; i++) if (recv_size[i]) recv_buf[i] = (char *) ADIOI_Malloc(recv_size[i]); j = 0; for (i=0; i < nprocs; i++) if (recv_size[i]) { MPI_Irecv(recv_buf[i], recv_size[i], MPI_BYTE, i, myrank+i+100*iter, fd->comm, requests+j); j++; #ifdef RDCOLL_DEBUG DBG_FPRINTF(stderr, "node %d, recv_size %d, tag %d \n", myrank, recv_size[i], myrank+i+100*iter); #endif } } /* create derived datatypes and send data */ j = 0; for (i=0; i<nprocs; i++) { if (send_size[i]) { /* take care if the last off-len pair is a partial send */ if (partial_send[i]) { k = start_pos[i] + count[i] - 1; tmp = others_req[i].lens[k]; others_req[i].lens[k] = partial_send[i]; } ADIOI_Type_create_hindexed_x(count[i], &(others_req[i].lens[start_pos[i]]), &(others_req[i].mem_ptrs[start_pos[i]]), MPI_BYTE, &send_type); /* absolute displacement; use MPI_BOTTOM in send */ MPI_Type_commit(&send_type); MPI_Isend(MPI_BOTTOM, 1, send_type, i, myrank+i+100*iter, fd->comm, requests+nprocs_recv+j); MPI_Type_free(&send_type); if (partial_send[i]) others_req[i].lens[k] = tmp; j++; } } statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send+nprocs_recv+1) * \ sizeof(MPI_Status)); /* +1 to avoid a 0-size malloc */ /* wait on the receives */ if (nprocs_recv) { #ifdef NEEDS_MPI_TEST j = 0; while (!j) MPI_Testall(nprocs_recv, requests, &j, statuses); #else MPI_Waitall(nprocs_recv, requests, statuses); #endif /* if noncontiguous, to the copies from the recv buffers */ if (!buftype_is_contig) ADIOI_Fill_user_buffer(fd, buf, flat_buf, recv_buf, offset_list, len_list, (unsigned*)recv_size, requests, statuses, recd_from_proc, nprocs, contig_access_count, min_st_offset, fd_size, fd_start, fd_end, buftype_extent); } /* wait on the sends*/ MPI_Waitall(nprocs_send, requests+nprocs_recv, statuses+nprocs_recv); ADIOI_Free(statuses); ADIOI_Free(requests); if (!buftype_is_contig) { for (i=0; i < nprocs; i++) if (recv_size[i]) ADIOI_Free(recv_buf[i]); ADIOI_Free(recv_buf); } #ifdef AGGREGATION_PROFILE MPE_Log_event (5033, 0, NULL); #endif }
int main(int argc, char *argv[]) { int myid, numprocs, i, j; int size, align_size; char *s_buf, *r_buf; double t_start = 0.0, t_end = 0.0, t = 0.0; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numprocs); MPI_Comm_rank(MPI_COMM_WORLD, &myid); align_size = getpagesize(); s_buf = (char *) (((unsigned long) s_buf1 + (align_size - 1)) / align_size * align_size); r_buf = (char *) (((unsigned long) r_buf1 + (align_size - 1)) / align_size * align_size); if (myid == 0) { fprintf(stdout, "# OSU MPI Bidirectional Bandwidth Test (Version 2.0)\n"); fprintf(stdout, "# Size\t\tBi-Bandwidth (MB/s) \n"); } for (size = 1; size <= MAX_MSG_SIZE; size *= 2) { /* touch the data */ for (i = 0; i < size; i++) { s_buf[i] = 'a'; r_buf[i] = 'b'; } if (size > large_message_size) { loop = loop_large; skip = skip_large; window_size = window_size_large; } if (myid == 0) { for (i = 0; i < loop + skip; i++) { if (i == skip) t_start = MPI_Wtime(); for (j = 0; j < window_size; j++) MPI_Irecv(r_buf, size, MPI_CHAR, 1, 10, MPI_COMM_WORLD, recv_request + j); for (j = 0; j < window_size; j++) MPI_Isend(s_buf, size, MPI_CHAR, 1, 100, MPI_COMM_WORLD, send_request + j); MPI_Waitall(window_size, send_request, reqstat); MPI_Waitall(window_size, recv_request, reqstat); } t_end = MPI_Wtime(); t = t_end - t_start; } else if (myid == 1) { for (i = 0; i < loop + skip; i++) { for (j = 0; j < window_size; j++) MPI_Irecv(r_buf, size, MPI_CHAR, 0, 100, MPI_COMM_WORLD, recv_request + j); for (j = 0; j < window_size; j++) MPI_Isend(s_buf, size, MPI_CHAR, 0, 10, MPI_COMM_WORLD, send_request + j); MPI_Waitall(window_size, send_request, reqstat); MPI_Waitall(window_size, recv_request, reqstat); } } if (myid == 0) { double tmp; tmp = ((size * 1.0) / 1.0e6) * loop * window_size * 2; fprintf(stdout, "%d\t\t%f\n", size, tmp / t); } } MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int size=-1,rank=-1, left=-1, right=-1, you=-1; int ndata=127,ndata_max=127,seed; int rv, nsec=0, count, cmpl; long long int i,j,k; unsigned long long int nflop=0,nmem=1,nsleep=0,nrep=1, myflops; char *env_ptr, cbuf[4096]; double *sbuf, *rbuf,*x; MPI_Status *s; MPI_Request *r; time_t ts; seed = time(&ts); flags |= DOMPI; while(--argc && argv++) { if(!strcmp("-v",*argv)) { flags |= DOVERBOSE; } else if(!strcmp("-n",*argv)) { --argc; argv++; nflop = atol(*argv); } else if(!strcmp("-N",*argv)) { --argc; argv++; nrep = atol(*argv); } else if(!strcmp("-d",*argv)) { --argc; argv++; ndata_max = ndata = atol(*argv); } else if(!strcmp("-m",*argv)) { --argc; argv++; nmem = atol(*argv); } else if(!strcmp("-w",*argv)) { --argc; argv++; nsec = atoi(*argv); } else if(!strcmp("-s",*argv)) { --argc; argv++; nsleep = atol(*argv); } else if(!strcmp("-spray",*argv)) { flags |= DOSPRAY; } else if(!strcmp("-c",*argv)) { flags |= CORE; } else if(!strcmp("-r",*argv)) { flags |= REGION; } else if(!strcmp("-stair",*argv)) { flags |= STAIR_RANK; } else if(!strcmp("-stair_region",*argv)) { flags |= STAIR_REGION; } else if(!strcmp("-nompi",*argv)) { flags &= ~DOMPI; } } if(flags & DOMPI) { MPI_Init(&argc,&argv); /* test double init MPI_Init(&argc,&argv); */ MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); } if(nsec > 0) { sleep(nsec); } if(nmem) { nmem = (nmem*1024*1024/sizeof(double)); x = (double *)malloc((size_t)(nmem*sizeof(double))); for(j=0;j<nrep;j++) { for(i=0;i<nmem;i++) { x[i] = i; } for(i=0;i<nmem;i++) { x[i] = i*x[i]; } if(x[nmem-1]*x[nmem-1] < 0) { printf("assumption about flop-test or optimization failed\n"); } } if(0) free((char *)x); } /* #define LONG_REGNAME rshouldbethelastchar */ #define LONG_REGNAME abcdefghijklmnopqrst if(flags & REGION) { MPI_Pcontrol(0,"enter_region(abcdefghijklmnopqrst)"); sprintf(cbuf,""); MPI_Pcontrol(0,"get_region()",cbuf); if(strcmp(cbuf,"abcdefghijklmnopqrst")) { printf("%d in region = \"%s\" not \"%s\"\n", rank,cbuf,"abcdefghijklmnopqrst"); fflush(stdout); } MPI_Pcontrol(0,"exit_region(abcdefghijklmnopqrst)"); MPI_Pcontrol(0,"get_region()",cbuf); if(strcmp(cbuf,"ipm_noregion")) { printf("%d out region = \"%s\" not \"%s\"\n", rank,cbuf,"ipm_noregion"); fflush(stdout); } } if(flags & REGION && rank > -1 ) MPI_Pcontrol(1,"region_zzzzzzzzzzzZz"); if(nflop) { x = (double *)malloc((size_t)(10*sizeof(double))); j = k = 0; for(i=0;i<10;i++) { x[i] = 1.0; } if(flags & STAIR_RANK) { myflops = (rank*nflop)/size; } else { myflops = nflop; } for(i=0;i<nflop;i++) { x[j] = x[j]*x[k]; j = ((i%9)?(j+1):(0)); k = ((i%8)?(k+1):(0)); } free((char *)x); } if(nsleep) { sleep(nsleep); } if(flags & REGION && rank > -1 ) MPI_Pcontrol(-1,"region_zzzzzzzzzzzZz"); if(nmem<nflop) nmem=nflop; if(nflop>1) printf("FLOPS = %lld BYTES = %lld\n", nflop, nmem); fflush(stdout); if(flags & CORE) { for(i=0;;i++) { x[i] = x[i*i-1000]; } } if(flags & DOMPI) { s = (MPI_Status *)malloc((size_t)(sizeof(MPI_Status)*2*size)); r = (MPI_Request *)malloc((size_t)(sizeof(MPI_Request)*2*size)); sbuf = (double *)malloc((size_t)(ndata_max*sizeof(double))); rbuf = (double *)malloc((size_t)(ndata_max*sizeof(double))); for(i=0;i<ndata_max;i++) { sbuf[i] = rbuf[i] = i; } MPI_Bcast(&seed,1,MPI_INT,0,MPI_COMM_WORLD); srand48(seed); for(i=0;i<nrep;i++) { MPI_Bcast(sbuf,ndata_max,MPI_DOUBLE,0,MPI_COMM_WORLD); } if(size>1) { if(!rank) {left=size-1;} else { left = rank-1;} if(rank == size-1) { right=0;} else {right=rank+1;} you = (rank < size/2)?(rank+size/2):(rank-size/2); } else { you = left = right = rank; } for(i=0;i<nrep;i++) { if(flags & DOSPRAY) { ndata = (long int)(drand48()*ndata_max)+1; } MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,right,1,rbuf,ndata,MPI_DOUBLE,left,1,MPI_COMM_WORLD,s); MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,left,1,rbuf,ndata,MPI_DOUBLE,right,1,MPI_COMM_WORLD,s); if(flags & REGION) MPI_Pcontrol(1,"region_a"); MPI_Barrier(MPI_COMM_WORLD); MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,left,1,rbuf,ndata,MPI_DOUBLE,right,1,MPI_COMM_WORLD,s); MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,left,1,rbuf,ndata,MPI_DOUBLE,MPI_ANY_SOURCE,1,MPI_COMM_WORLD,s); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Isend(sbuf,ndata/2,MPI_DOUBLE,you,0,MPI_COMM_WORLD, r); MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &cmpl, s); MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, s); MPI_Get_count(s,MPI_DOUBLE,&count); MPI_Recv(rbuf,ndata,MPI_DOUBLE,MPI_ANY_SOURCE,0,MPI_COMM_WORLD, s); if(count != ndata/2) { printf("error: MPI_Get_count(s,MPI_DOUBLE,&count) --> count = %d\n",count); } MPI_Wait(r,s); /* FIXME - the following case may need to be addressed MPI_Test(r,&cmpl,s); printf("spam1 %d %d\n", s->MPI_SOURCE, cmpl); if(r != MPI_REQUEST_NULL) { MPI_Wait(r,s); printf("spam2 %d\n", s->MPI_SOURCE); } */ MPI_Irecv(rbuf,ndata,MPI_DOUBLE,MPI_ANY_SOURCE,0,MPI_COMM_WORLD,r); MPI_Send(sbuf,ndata,MPI_DOUBLE,you,0,MPI_COMM_WORLD); MPI_Wait(r,s); for(j=0;j<size;j++) { MPI_Isend(sbuf+j%ndata_max,1,MPI_DOUBLE,j,4,MPI_COMM_WORLD, r+j); MPI_Irecv(rbuf+j%ndata_max,1,MPI_DOUBLE,j,4,MPI_COMM_WORLD,r+size+j); } MPI_Waitall(2*size,r,s); /* for(j=0;j<size;j++) { printf("rep %d stat %d %d %d\n",i, j, s[j].MPI_SOURCE, s[j+size].MPI_SOURCE); } */ if(flags & REGION) MPI_Pcontrol(-1,"region_a"); if(flags & REGION) MPI_Pcontrol(1,"region_b"); MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if(flags & REGION) MPI_Pcontrol(-1,"region_b"); if(1) { if(flags & REGION) MPI_Pcontrol(1,"region_c"); MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if(flags & REGION) MPI_Pcontrol(-1,"region_c"); if(flags & REGION) MPI_Pcontrol(1,"region_d"); MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if(flags & REGION) MPI_Pcontrol(-1,"region_d"); if(flags & REGION) MPI_Pcontrol(1,"region_e"); MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if(flags & REGION) MPI_Pcontrol(-1,"region_e"); if(flags & REGION) MPI_Pcontrol(1,"region_f"); MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if(flags & REGION) MPI_Pcontrol(-1,"region_f"); if(flags & REGION) MPI_Pcontrol(1,"region_g"); MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if(flags & REGION) MPI_Pcontrol(-1,"region_g"); if(flags & REGION) MPI_Pcontrol(1,"region_h"); MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if(flags & REGION) MPI_Pcontrol(-1,"region_h"); if(flags & REGION) MPI_Pcontrol(1,"region_i"); MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if(flags & REGION) MPI_Pcontrol(-1,"region_i"); } } MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); } free((char *)rbuf); free((char *)sbuf); free((char *)r); free((char *)s); free((char *)x); return 0; }
int main(int argc, char **argv) { int my_rank; float * p1; float * p2; float * ptemp; int i, j; MPI_Status stats[2*NUM_SLICES+2]; MPI_Request requests[2*NUM_SLICES+2]; int ierr; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); /*DETERMINE RANK OF THIS PROCESSOR*/ printf("Hello Paul %d\n", my_rank); if (my_rank == 0) { init_signal(un); p1 = un; for (j = 0; j < SLICE_WIDTH; j++) { local_data[j] = *(un+j); } // send starting data to other processes for (i= 1; i < NUM_SLICES; i++) { MPI_Isend(un + (SLICE_WIDTH-2* OVERLAP) * i, SLICE_WIDTH, MPI_FLOAT, i, 0, MPI_COMM_WORLD, &requests[i]); } MPI_Waitall(NUM_SLICES-1, &requests[1], &stats[1]); for (j=0; j< 8192; j++) { //printf("process 0 step %d\n", j); // update from A to B for (i=0; i < OVERLAP/2; i++) { update_signal(local_data, local_data_1, 0, SLICE_WIDTH); } // update from B to A for (i=0; i < OVERLAP/2; i++) { update_signal(local_data_1, local_data, 0, SLICE_WIDTH); } // send/receive overlaps MPI_Isend(local_data + (SLICE_WIDTH- 2 * OVERLAP), OVERLAP, MPI_FLOAT, 1, 0, MPI_COMM_WORLD, &requests[0]); MPI_Irecv(local_data + (SLICE_WIDTH - OVERLAP), OVERLAP, MPI_FLOAT, 1, 1, MPI_COMM_WORLD, &requests[1]); MPI_Waitall(2, &requests[0], &stats[0]); } for (i= 1; i < NUM_SLICES; i++) { MPI_Irecv(un_1 + (SLICE_WIDTH-2* OVERLAP) * i, SLICE_WIDTH, MPI_FLOAT, i, 0, MPI_COMM_WORLD, &requests[i]); } MPI_Waitall(NUM_SLICES-1, &requests[1], &stats[1]); for (j = 0; j< SLICE_WIDTH; j++) { *(un_1 +j) = local_data[j]; } display_signal(un_1); } else { MPI_Irecv(local_data, SLICE_WIDTH, MPI_FLOAT, 0, 0, MPI_COMM_WORLD, &requests[0]); MPI_Wait(&requests[0], &stats[0]); for (j=0; j< 8192; j++) { //printf("process step %d\n", j); for (i=0; i < OVERLAP/2; i++) { update_signal(local_data, local_data_1, 0, SLICE_WIDTH); } for (i=0; i < OVERLAP/2; i++) { update_signal(local_data_1, local_data, 0, SLICE_WIDTH); } if (my_rank == (NUM_SLICES-1)) { MPI_Isend(local_data + OVERLAP, OVERLAP, MPI_FLOAT, my_rank-1, 2*my_rank -1, MPI_COMM_WORLD, &requests[0]); MPI_Irecv(local_data, OVERLAP, MPI_FLOAT, my_rank-1, 2* my_rank-2, MPI_COMM_WORLD, &requests[1]); MPI_Waitall(2, &requests[0], &stats[0]); } else { MPI_Isend(local_data + OVERLAP, OVERLAP, MPI_FLOAT, my_rank-1, 2*my_rank-1, MPI_COMM_WORLD, &requests[0]); MPI_Irecv(local_data, OVERLAP, MPI_FLOAT, my_rank-1, 2*my_rank-2, MPI_COMM_WORLD, &requests[1]); MPI_Irecv(local_data + SLICE_WIDTH - OVERLAP, OVERLAP, MPI_FLOAT, my_rank+1, 2*my_rank+1, MPI_COMM_WORLD, &requests[2]); MPI_Isend(local_data + SLICE_WIDTH - 2 *OVERLAP, OVERLAP, MPI_FLOAT, my_rank+1, 2*my_rank, MPI_COMM_WORLD, &requests[3]); MPI_Waitall(4, &requests[0], &stats[0]); } } MPI_Isend(local_data, SLICE_WIDTH, MPI_FLOAT, 0, 0, MPI_COMM_WORLD, &requests[0]); MPI_Wait(&requests[0], &stats[0]); } char processor_name[128]; int name_len; MPI_Get_processor_name(processor_name, &name_len); // Print off a hello world message printf("Hello world from processor %s\n", processor_name); MPI_Finalize(); }
void xchange_lexicfield32(spinor32 * const l) { MPI_Request requests[16]; MPI_Status status[16]; # ifdef PARALLELT int reqcount = 4; # elif defined PARALLELXT int reqcount = 8; # elif defined PARALLELXYT int reqcount = 12; # elif defined PARALLELXYZT int reqcount = 16; # endif #ifdef _KOJAK_INST #pragma pomp inst begin(xchange_lexicfield32) #endif # if (defined BGL && defined XLC) __alignx(16, l); # endif # ifdef TM_USE_MPI /* send the data to the neighbour on the left */ /* recieve the data from the neighbour on the right */ MPI_Isend((void*)l, 1, lfield_time_slice_cont32, g_nb_t_dn, 5081, g_cart_grid, &requests[0]); MPI_Irecv((void*)(l+VOLUME), 1, lfield_time_slice_cont32, g_nb_t_up, 5081, g_cart_grid, &requests[1]); # if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) /* send the data to the neighbour on the left in x direction */ /* recieve the data from the neighbour on the right in x direction */ MPI_Isend((void*)l, 1, lfield_x_slice_gath32, g_nb_x_dn, 5091, g_cart_grid, &requests[4]); MPI_Irecv((void*)(l+(T+2)*LX*LY*LZ), 1, lfield_x_slice_cont32, g_nb_x_up, 5091, g_cart_grid, &requests[5]); # endif # if (defined PARALLELXYT || defined PARALLELXYZT) /* send the data to the neighbour on the left in y direction */ /* recieve the data from the neighbour on the right in y direction */ MPI_Isend((void*)l, 1, lfield_y_slice_gath32, g_nb_y_dn, 5101, g_cart_grid, &requests[8]); MPI_Irecv((void*)(l + VOLUME + 2*LZ*(LX*LY + T*LY)), 1, lfield_y_slice_cont32, g_nb_y_up, 5101, g_cart_grid, &requests[9]); # endif # if (defined PARALLELXYZT) /* send the data to the neighbour on the left in z direction */ /* recieve the data from the neighbour on the right in z direction */ MPI_Isend((void*)l, 1, lfield_z_slice_gath32, g_nb_z_dn, 5503, g_cart_grid, &requests[12]); MPI_Irecv((void*)(l+VOLUME + 2*LZ*(LX*LY + T*LY) + 2*LZ*T*LX), 1, lfield_z_slice_cont32, g_nb_z_up, 5503, g_cart_grid, &requests[13]); # endif /* send the data to the neighbour on the right */ /* recieve the data from the neighbour on the left */ MPI_Isend((void*)(l+(T-1)*LX*LY*LZ), 1, lfield_time_slice_cont32, g_nb_t_up, 5082, g_cart_grid, &requests[2]); MPI_Irecv((void*)(l+(T+1)*LX*LY*LZ), 1, lfield_time_slice_cont32, g_nb_t_dn, 5082, g_cart_grid, &requests[3]); # if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) /* send the data to the neighbour on the right in x direction */ /* recieve the data from the neighbour on the left in x direction */ MPI_Isend((void*)(l+(LX-1)*LY*LZ), 1, lfield_x_slice_gath32, g_nb_x_up, 5092, g_cart_grid, &requests[6]); MPI_Irecv((void*)(l+((T+2)*LX*LY*LZ + T*LY*LZ)), 1, lfield_x_slice_cont32, g_nb_x_dn, 5092, g_cart_grid, &requests[7]); # endif # if (defined PARALLELXYT || defined PARALLELXYZT) /* send the data to the neighbour on the right in y direction */ /* recieve the data from the neighbour on the left in y direction */ MPI_Isend((void*)(l+(LY-1)*LZ), 1, lfield_y_slice_gath32, g_nb_y_up, 5102, g_cart_grid, &requests[10]); MPI_Irecv((void*)(l+VOLUME + 2*LZ*(LX*LY + T*LY) + T*LX*LZ), 1, lfield_y_slice_cont32, g_nb_y_dn, 5102, g_cart_grid, &requests[11]); # endif # if defined PARALLELXYZT /* send the data to the neighbour on the right in y direction */ /* recieve the data from the neighbour on the left in y direction */ MPI_Isend((void*)(l+LZ-1), 1, lfield_z_slice_gath32, g_nb_z_up, 5504, g_cart_grid, &requests[14]); MPI_Irecv((void*)(l+VOLUME + 2*LZ*(LX*LY + T*LY) + 2*T*LX*LZ + T*LX*LY), 1, lfield_z_slice_cont32, g_nb_z_dn, 5504, g_cart_grid, &requests[15]); # endif MPI_Waitall(reqcount, requests, status); # endif return; #ifdef _KOJAK_INST #pragma pomp inst end(xchange_lexicfield32) #endif }