Exemple #1
0
/*************************************************************************
* This function tests the repeated shmem_put
**************************************************************************/
void SetUp(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace)
{
  int i, j, k, islocal, penum, gnvtxs, nvtxs, nlocal, firstvtx, lastvtx, nsend, nrecv, nnbrs, nadj;
  int npes=ctrl->npes, mype=ctrl->mype;
  idxtype *vtxdist, *xadj, *adjncy;
  idxtype *peind, *recvptr, *recvind, *sendptr, *sendind;
  idxtype *receive, *pemap, *imap, *lperm;
  idxtype *pexadj, *peadjncy, *peadjloc, *startsind;
  KeyValueType *recvrequests, *sendrequests, *adjpairs;

  IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm));
  IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->SetupTmr));

  gnvtxs  = graph->gnvtxs;
  nvtxs   = graph->nvtxs;
  vtxdist = graph->vtxdist;
  xadj    = graph->xadj;
  adjncy  = graph->adjncy;

  firstvtx = vtxdist[mype];
  lastvtx = vtxdist[mype+1];

  pemap = wspace->pv1;
  idxset(npes, -1, pemap);

  lperm = graph->lperm = idxmalloc(nvtxs, "SetUp: graph->lperm");
  for (i=0; i<nvtxs; i++)
    lperm[i] = i;

  /************************************************************* 
   * Determine what you need to receive 
   *************************************************************/
  receive  = wspace->indices;  		/* Use the large global received array for now */
  adjpairs = wspace->pairs;

  for (nlocal = nadj = i = 0; i<nvtxs; i++) {
    islocal = 1;
    for (j=xadj[i]; j<xadj[i+1]; j++) {
      k = adjncy[j];
      if (k >= firstvtx && k < lastvtx) {
        adjncy[j] = k-firstvtx;
        continue;  /* local vertex */
      }
      adjpairs[nadj].key = k;
      adjpairs[nadj++].val = j;
      islocal = 0;
    }
    if (islocal) {
      lperm[i] = lperm[nlocal];
      lperm[nlocal++] = i;
    }
  }

  /* Take care the received part now */
  ikeysort(nadj, adjpairs);
  adjpairs[nadj].key = gnvtxs+1;  /* Boundary condition */
  for (nrecv=i=0; i<nadj; i++) {
    adjncy[adjpairs[i].val] = nvtxs+nrecv;
    if (adjpairs[i].key != adjpairs[i+1].key)
      receive[nrecv++] = adjpairs[i].key;
  }


  /* Allocate space for the setup info attached to this level of the graph */
  peind = graph->peind = idxmalloc(npes, "SetUp: peind");
  recvptr = graph->recvptr = idxmalloc(npes+1, "SetUp: recvptr");
  recvind = graph->recvind = idxmalloc(nrecv, "SetUp: recvind");

  /* Take care of the received portion */
  idxcopy(nrecv, receive, recvind);  /* Copy the vertices to be received into recvind */

  i = nnbrs = recvptr[0] = 0;
  for (penum=0; penum<npes; penum++) {
    for (j=i; j<nrecv; j++) {
      if (recvind[j] >= vtxdist[penum+1])
        break;
    }
    if (j > i) {
      peind[nnbrs] = penum;
      recvptr[++nnbrs] = j;
      i = j;
    }
  }


  /************************************************************* 
   * Determine what you need to send 
   *************************************************************/
  /* Tell the other processors what they need to send you */
  recvrequests = wspace->pepairs1;
  sendrequests = wspace->pepairs2;
  for (i=0; i<npes; i++)
    recvrequests[i].key = 0;
  for (i=0; i<nnbrs; i++) {
    recvrequests[peind[i]].key = recvptr[i+1]-recvptr[i];
    recvrequests[peind[i]].val = nvtxs+recvptr[i];
  }
  MPI_Alltoall((void *)recvrequests, 2, IDX_DATATYPE, (void *)sendrequests, 2, IDX_DATATYPE, ctrl->comm);


  sendptr = graph->sendptr = idxmalloc(npes+1, "SetUp: sendptr");
  startsind = wspace->pv2;
  for (j=i=0; i<npes; i++) {
    if (sendrequests[i].key > 0) {
      sendptr[j] = sendrequests[i].key;
      startsind[j] = sendrequests[i].val;
      j++;
    }
  }
  ASSERT(ctrl, nnbrs == j);
  MAKECSR(i, j, sendptr);

  nsend = sendptr[nnbrs];
  sendind = graph->sendind = idxmalloc(nsend, "SetUp: sendind");


  /* Issue the receives for sendind */
  for (i=0; i<nnbrs; i++) {
    MPI_Irecv((void *)(sendind+sendptr[i]), sendptr[i+1]-sendptr[i], IDX_DATATYPE, 
              peind[i], 1, ctrl->comm, ctrl->rreq+i);
  }

  /* Issue the sends. My recvind[penum] becomes penum's sendind[mype] */
  for (i=0; i<nnbrs; i++) {
    MPI_Isend((void *)(recvind+recvptr[i]), recvptr[i+1]-recvptr[i], IDX_DATATYPE,
              peind[i], 1, ctrl->comm, ctrl->sreq+i);
  }

  MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses);
  MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses);



  /* Create the peadjncy data structure for sparse boundary exchanges */
  pexadj = graph->pexadj = idxsmalloc(nvtxs+1, 0, "SetUp: pexadj");
  peadjncy = graph->peadjncy = idxmalloc(nsend, "SetUp: peadjncy");
  peadjloc = graph->peadjloc = idxmalloc(nsend, "SetUp: peadjloc");

  for (i=0; i<nsend; i++) {
    ASSERTP(ctrl, sendind[i] >= firstvtx && sendind[i] < lastvtx, (ctrl, "%d %d %d\n", sendind[i], firstvtx, lastvtx));
    pexadj[sendind[i]-firstvtx]++;
  }
  MAKECSR(i, nvtxs, pexadj);

  for (i=0; i<nnbrs; i++) {
    for (j=sendptr[i]; j<sendptr[i+1]; j++) {
      k = pexadj[sendind[j]-firstvtx]++;
      peadjncy[k] = i;  /* peind[i] is the actual PE number */
      peadjloc[k] = startsind[i]++;
    }
  }
  ASSERT(ctrl, pexadj[nvtxs] == nsend);

  for (i=nvtxs; i>0; i--)
    pexadj[i] = pexadj[i-1];
  pexadj[0] = 0;


  graph->nnbrs = nnbrs;
  graph->nrecv = nrecv;
  graph->nsend = nsend;
  graph->nlocal = nlocal;


  /* Create the inverse map from ladjncy to adjncy */
  imap = graph->imap = idxmalloc(nvtxs+nrecv, "SetUp: imap");
  for (i=0; i<nvtxs; i++)
    imap[i] = firstvtx+i;
  for (i=0; i<nrecv; i++)
    imap[nvtxs+i] = recvind[i];


  /* Check if wspace->nlarge is large enough for nrecv and nsend */
  if (wspace->nlarge < nrecv+nsend) {
    free(wspace->indices);
    free(wspace->pairs);
    wspace->nlarge = nrecv+nsend;
    wspace->indices = idxmalloc(wspace->nlarge, "SetUp: wspace->indices");
    wspace->pairs = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*wspace->nlarge, "SetUp: wspace->pairs");
  }

  IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->SetupTmr));

#ifdef DEBUG_SETUPINFO
  rprintf(ctrl, "[%5d %5d] \tl:[%5d %5d] \ts:[%5d, %5d] \tr:[%5d, %5d]\n", 
            GlobalSEMin(ctrl, nvtxs), GlobalSEMax(ctrl, nvtxs),
            GlobalSEMin(ctrl, nlocal), GlobalSEMax(ctrl, nlocal),
            GlobalSEMin(ctrl, nsend), GlobalSEMax(ctrl, nsend),
            GlobalSEMin(ctrl, nrecv), GlobalSEMax(ctrl, nrecv));

  PrintSetUpInfo(ctrl, graph);
#endif
}
Exemple #2
0
void xchange_deri() {
  int cntr=0;
#ifdef MPI
  MPI_Request request[8];
  MPI_Status status[8];
  int ix,mu, t, y, z, x;

#    if (defined PARALLELT || defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT )
  /* send the data to the neighbour on the left in time direction */
  /* recieve the data from the neighbour on the right in time direction */
  MPI_Isend(&df0[gI_m1_0_0_0][0].d1,    1, deri_time_slice_cont, g_nb_t_dn, 43,
	    g_cart_grid, &request[cntr]);
  
  MPI_Irecv(&ddummy[gI_Lm1_0_0_0][0].d1, 1, deri_time_slice_cont, g_nb_t_up, 43,
	    g_cart_grid, &request[cntr+1]);
  cntr=cntr+2;
#    endif

#    if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELX || defined PARALLELXY || defined PARALLELXYZ )

  /* send the data to the neighbour on the left in x direction */
  /* recieve the data from the neighbour on the right in x direction */
  MPI_Isend(&df0[gI_0_m1_0_0][0],    1, deri_x_slice_cont, g_nb_x_dn, 44,
	    g_cart_grid, &request[cntr]);
  MPI_Irecv(&ddummy[gI_0_Lm1_0_0][0],             1, deri_x_slice_gath, g_nb_x_up, 44,
	    g_cart_grid, &request[cntr+1]);
  cntr=cntr+2;
#    endif
#    if (defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELXY || defined PARALLELXYZ )
  /* send the data to the neighbour on the left in y direction */
  /* recieve the data from the neighbour on the right in y direction */
  MPI_Isend((void*)df0[gI_0_0_m1_0], 
	    1, deri_y_slice_cont, g_nb_y_dn, 45,
	    g_cart_grid, &request[cntr]);
  MPI_Irecv((void*)ddummy[gI_0_0_Lm1_0],
	    1, deri_y_slice_gath, g_nb_y_up, 45,
	    g_cart_grid, &request[cntr+1]);
  cntr=cntr+2;
#    endif
#    if (defined PARALLELXYZT || defined PARALLELXYZ )
  /* send the data to the neighbour on the left in z direction */
  /* recieve the data from the neighbour on the right in z direction */
  MPI_Isend((void*)df0[gI_0_0_0_m1], 
	    1, deri_z_slice_cont, g_nb_z_dn, 46,
	    g_cart_grid, &request[cntr]);
  MPI_Irecv((void*)ddummy[gI_0_0_0_Lm1],
	    1, deri_z_slice_gath, g_nb_z_up, 46,
	    g_cart_grid, &request[cntr+1]);
  cntr=cntr+2;
#    endif
  MPI_Waitall(cntr, request, status);

#    if (defined PARALLELT || defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT )

  /* add ddummy to df0 */
  for(x = 0; x < LX; x++) {
    for(y = 0; y < LY; y++) {
      for(z = 0; z < LZ; z++) {
	ix = g_ipt[T-1][x][y][z];
	for(mu=0;mu<4;mu++){ 
	  df0[ix][mu].d1 += ddummy[ix][mu].d1;
	  df0[ix][mu].d2 += ddummy[ix][mu].d2;
	  df0[ix][mu].d3 += ddummy[ix][mu].d3;
	  df0[ix][mu].d4 += ddummy[ix][mu].d4;
	  df0[ix][mu].d5 += ddummy[ix][mu].d5;
	  df0[ix][mu].d6 += ddummy[ix][mu].d6;
	  df0[ix][mu].d7 += ddummy[ix][mu].d7;
	  df0[ix][mu].d8 += ddummy[ix][mu].d8;
	}
      }
    }
  }

  /* send the data to the neighbour on the right is not needed*/
#    endif

#    if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELX || defined PARALLELXY || defined PARALLELXYZ )

  /* add ddummy to df0 */
  for(t = 0; t < T; t++) {
    for(y = 0; y < LY; y++) {
      for(z = 0; z < LZ; z++) {
	ix = g_ipt[t][LX-1][y][z];
	for(mu=0;mu<4;mu++){
	  df0[ix][mu].d1 += ddummy[ix][mu].d1;
	  df0[ix][mu].d2 += ddummy[ix][mu].d2;
	  df0[ix][mu].d3 += ddummy[ix][mu].d3;
	  df0[ix][mu].d4 += ddummy[ix][mu].d4;
	  df0[ix][mu].d5 += ddummy[ix][mu].d5;
	  df0[ix][mu].d6 += ddummy[ix][mu].d6;
	  df0[ix][mu].d7 += ddummy[ix][mu].d7;
	  df0[ix][mu].d8 += ddummy[ix][mu].d8;
	}
      }
    }
  }
  /* send the data to the neighbour on the right is not needed*/  

#    endif

#    if (defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELXY || defined PARALLELXYZ )

  /* add ddummy to df0 */
  for(t = 0; t < T; t++) {
    for(x = 0; x < LX; x++) {
      for(z = 0; z < LZ; z++) {
	ix = g_ipt[t][x][LY-1][z];
	for(mu=0;mu<4;mu++){
	  df0[ix][mu].d1 += ddummy[ix][mu].d1;
	  df0[ix][mu].d2 += ddummy[ix][mu].d2;
	  df0[ix][mu].d3 += ddummy[ix][mu].d3;
	  df0[ix][mu].d4 += ddummy[ix][mu].d4;
	  df0[ix][mu].d5 += ddummy[ix][mu].d5;
	  df0[ix][mu].d6 += ddummy[ix][mu].d6;
	  df0[ix][mu].d7 += ddummy[ix][mu].d7;
	  df0[ix][mu].d8 += ddummy[ix][mu].d8;
	}
      }
    }
  }
  /* send the data to the neighbour on the right is not needed*/  

#    endif

#    if ( defined PARALLELXYZT || defined PARALLELXYZ )
  /* add ddummy to df0 */
  for(t = 0; t < T; t++) {
    for(x = 0; x < LX; x++) {
      for(y = 0; y < LY; y++) {
	ix = g_ipt[t][x][y][LZ-1];
	for(mu=0;mu<4;mu++){
	  df0[ix][mu].d1 += ddummy[ix][mu].d1;
	  df0[ix][mu].d2 += ddummy[ix][mu].d2;
	  df0[ix][mu].d3 += ddummy[ix][mu].d3;
	  df0[ix][mu].d4 += ddummy[ix][mu].d4;
	  df0[ix][mu].d5 += ddummy[ix][mu].d5;
	  df0[ix][mu].d6 += ddummy[ix][mu].d6;
	  df0[ix][mu].d7 += ddummy[ix][mu].d7;
	  df0[ix][mu].d8 += ddummy[ix][mu].d8;
	}
      }
    }
  }
  /* send the data to the neighbour on the right is not needed*/  

#    endif
  return;
#  endif /* MPI */
}
Exemple #3
0
/* ADIOI_Exchange_file_views - Sends all the aggregators the file
 * views and file view states of the clients.  It fills in the
 * client_file_view_state_arr for the aggregators and the
 * my_mem_view_state for the client.  It also initializes the
 * agg_file_view_state for all clients, which is the view for each
 * aggregator of a client's filetype. */
void ADIOI_Exch_file_views(int myrank, int nprocs, int file_ptr_type,
                           ADIO_File fd, int count,
                           MPI_Datatype datatype, ADIO_Offset off,
                           view_state * my_mem_view_state_arr,
                           view_state * agg_file_view_state_arr,
                           view_state * client_file_view_state_arr)
{
    /* Convert my own fileview to an ADIOI_Flattened type and a
     * disp. MPI_Alltoall the count of ADIOI_Flatlist nodes.
     * MPI_Isend/Irecv the block_lens, indices of ADIOI_Flatlist node
     * to/from each of the aggregators with the rest of the file view
     * state. */

    int i = -1, j = -1;
    amount_and_extra_data_t *send_count_arr = NULL;
    amount_and_extra_data_t *recv_count_arr = NULL;
    int send_req_arr_sz = 0;
    int recv_req_arr_sz = 0;
    MPI_Request *send_req_arr = NULL, *recv_req_arr = NULL;
    MPI_Status *statuses = NULL;
    ADIO_Offset disp_off_sz_ext_typesz[6];
    MPI_Aint memtype_extent, filetype_extent;
    int ret = -1;

    /* parameters for datatypes */
    ADIOI_Flatlist_node *flat_mem_p = NULL, *flat_file_p = NULL;
    MPI_Count memtype_sz = -1;
    int memtype_is_contig = -1;
    ADIO_Offset filetype_sz = -1;

#ifdef AGGREGATION_PROFILE
    MPE_Log_event(5014, 0, NULL);
#endif
    /* The memtype will be freed after the call.  The filetype will be
     * freed in the close and should have been flattened in the file
     * view. */
    MPI_Type_size_x(datatype, &memtype_sz);
    MPI_Type_extent(datatype, &memtype_extent);
    if (memtype_sz == memtype_extent) {
        memtype_is_contig = 1;
        flat_mem_p = ADIOI_Flatten_and_find(datatype);
        flat_mem_p->blocklens[0] = memtype_sz * count;
    } else {
        flat_mem_p = ADIOI_Flatten_and_find(datatype);
    }

    MPI_Type_extent(fd->filetype, &filetype_extent);
    MPI_Type_size_x(fd->filetype, &filetype_sz);
    flat_file_p = ADIOI_Flatten_and_find(fd->filetype);
    if (filetype_extent == filetype_sz) {
        flat_file_p->blocklens[0] = memtype_sz * count;
        filetype_extent = memtype_sz * count;
        filetype_sz = filetype_extent;
    }

    disp_off_sz_ext_typesz[0] = fd->fp_ind;
    disp_off_sz_ext_typesz[1] = fd->disp;
    disp_off_sz_ext_typesz[2] = off;
    disp_off_sz_ext_typesz[3] = memtype_sz * count;
    disp_off_sz_ext_typesz[4] = (ADIO_Offset) filetype_extent;
    disp_off_sz_ext_typesz[5] = (ADIO_Offset) filetype_sz;

    if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
        recv_count_arr = ADIOI_Calloc(nprocs, sizeof(amount_and_extra_data_t));
        send_count_arr = ADIOI_Calloc(nprocs, sizeof(amount_and_extra_data_t));
    } else {
        send_count_arr = ADIOI_Calloc(fd->hints->cb_nodes, sizeof(amount_and_extra_data_t));

        /* only aggregators receive data */
        if (fd->is_agg) {
            recv_count_arr = ADIOI_Calloc(nprocs, sizeof(amount_and_extra_data_t));
            recv_req_arr = ADIOI_Malloc(nprocs * sizeof(MPI_Request));
            for (i = 0; i < nprocs; i++)
                MPI_Irecv(&recv_count_arr[i], sizeof(amount_and_extra_data_t),
                          MPI_BYTE, i, COUNT_EXCH, fd->comm, &recv_req_arr[i]);
        }

        /* only send data to aggregators */
        send_req_arr = ADIOI_Calloc(fd->hints->cb_nodes, sizeof(MPI_Request));
        for (i = 0; i < fd->hints->cb_nodes; i++) {
            send_count_arr[i].count = flat_file_p->count;
            send_count_arr[i].fp_ind = disp_off_sz_ext_typesz[0];
            send_count_arr[i].disp = disp_off_sz_ext_typesz[1];
            send_count_arr[i].byte_off = disp_off_sz_ext_typesz[2];
            send_count_arr[i].sz = disp_off_sz_ext_typesz[3];
            send_count_arr[i].ext = disp_off_sz_ext_typesz[4];
            send_count_arr[i].type_sz = disp_off_sz_ext_typesz[5];
            MPI_Isend(&send_count_arr[i], sizeof(amount_and_extra_data_t),
                      MPI_BYTE, fd->hints->ranklist[i], COUNT_EXCH, fd->comm, &send_req_arr[i]);
        }
    }


    /* Every client has to build mem and file view_states for each aggregator.
     * We initialize their values here.  and we also initialize
     * send_count_arr */

    if (memtype_is_contig) {
        /* if memory is contigous, we now replace memtype_sz and
         * memtype_extent with the full access size */
        memtype_sz *= count;
        memtype_extent = memtype_sz;
    }

    for (i = 0; i < fd->hints->cb_nodes; i++) {
        int tmp_agg_idx = fd->hints->ranklist[i];
        memset(&(my_mem_view_state_arr[tmp_agg_idx]), 0, sizeof(view_state));
        my_mem_view_state_arr[tmp_agg_idx].sz = disp_off_sz_ext_typesz[3];
        my_mem_view_state_arr[tmp_agg_idx].ext = (ADIO_Offset) memtype_extent;
        my_mem_view_state_arr[tmp_agg_idx].type_sz = (ADIO_Offset) memtype_sz;
        my_mem_view_state_arr[tmp_agg_idx].flat_type_p = flat_mem_p;
        ADIOI_init_view_state(file_ptr_type, 1, &(my_mem_view_state_arr[tmp_agg_idx]), TEMP_OFF);
        ADIOI_init_view_state(file_ptr_type, 1, &(my_mem_view_state_arr[tmp_agg_idx]), REAL_OFF);

        memset(&(agg_file_view_state_arr[tmp_agg_idx]), 0, sizeof(view_state));
        agg_file_view_state_arr[tmp_agg_idx].fp_ind = disp_off_sz_ext_typesz[0];
        agg_file_view_state_arr[tmp_agg_idx].disp = disp_off_sz_ext_typesz[1];
        agg_file_view_state_arr[tmp_agg_idx].byte_off = disp_off_sz_ext_typesz[2];
        agg_file_view_state_arr[tmp_agg_idx].sz = disp_off_sz_ext_typesz[3];
        agg_file_view_state_arr[tmp_agg_idx].ext = disp_off_sz_ext_typesz[4];
        agg_file_view_state_arr[tmp_agg_idx].type_sz = disp_off_sz_ext_typesz[5];
        agg_file_view_state_arr[tmp_agg_idx].flat_type_p = flat_file_p;

        ADIOI_init_view_state(file_ptr_type, 1, &(agg_file_view_state_arr[tmp_agg_idx]), TEMP_OFF);
        ADIOI_init_view_state(file_ptr_type, 1, &(agg_file_view_state_arr[tmp_agg_idx]), REAL_OFF);

        if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
            send_count_arr[tmp_agg_idx].count = flat_file_p->count;
            send_count_arr[tmp_agg_idx].fp_ind = disp_off_sz_ext_typesz[0];
            send_count_arr[tmp_agg_idx].disp = disp_off_sz_ext_typesz[1];
            send_count_arr[tmp_agg_idx].byte_off = disp_off_sz_ext_typesz[2];
            send_count_arr[tmp_agg_idx].sz = disp_off_sz_ext_typesz[3];
            send_count_arr[tmp_agg_idx].ext = disp_off_sz_ext_typesz[4];
            send_count_arr[tmp_agg_idx].type_sz = disp_off_sz_ext_typesz[5];
        }
    }

#ifdef DEBUG2
    fprintf(stderr, "my own flattened memtype: ");
    ADIOI_Print_flatlist_node(flat_mem_p);
    fprintf(stderr, "my own flattened filetype: ");
    ADIOI_Print_flatlist_node(flat_file_p);
#endif

    if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
        ret = MPI_Alltoall(send_count_arr, sizeof(amount_and_extra_data_t),
                           MPI_BYTE,
                           recv_count_arr, sizeof(amount_and_extra_data_t), MPI_BYTE, fd->comm);
        if (ret != MPI_SUCCESS) {
            fprintf(stderr, "ADIOI_Exchange_file_views: MPI_Alltoall failed " "with error %d", ret);
            return;
        }
    } else {
#ifdef MPI_STATUSES_IGNORE
        statuses = MPI_STATUSES_IGNORE;
#else
        statuses = (MPI_Status *) ADIOI_Malloc(1 + nprocs * sizeof(MPI_Status));
#endif
        if (fd->is_agg) {
            MPI_Waitall(nprocs, recv_req_arr, statuses);
            ADIOI_Free(recv_req_arr);
        }
        MPI_Waitall(fd->hints->cb_nodes, send_req_arr, statuses);
#ifndef MPI_STATUSES_IGNORE
        ADIOI_Free(statuses);
#endif
        ADIOI_Free(send_req_arr);
    }
#ifdef DEBUG2
    if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
        fprintf(stderr, "send_count_arr:");
        for (i = 0; i < nprocs; i++) {
            fprintf(stderr, "[%d]=%d ", i, send_count_arr[i].count);
        }
        fprintf(stderr, "\n");
        fprintf(stderr, "recv_count_arr:");
        for (i = 0; i < nprocs; i++) {
            fprintf(stderr, "[%d]=%d ", i, recv_count_arr[i].count);
        }
        fprintf(stderr, "\n");
    } else {
        fprintf(stderr, "send_count_arr:");
        for (i = 0; i < fd->hints->cb_nodes; i++) {
            fprintf(stderr, "[%d]=%d ", i, send_count_arr[i].count);
        }
        fprintf(stderr, "\n");
        if (fd->is_agg) {
            fprintf(stderr, "recv_count_arr:");
            for (i = 0; i < nprocs; i++) {
                fprintf(stderr, "[%d]=%d ", i, recv_count_arr[i].count);
            }
            fprintf(stderr, "\n");
        }
    }
#endif

    if (fd->hints->cb_alltoall == ADIOI_HINT_DISABLE) {
        for (i = 0; i < fd->hints->cb_nodes; i++)
            if (send_count_arr[i].count > 0)
                send_req_arr_sz++;
    }
    /* Figure out how many counts to send/recv */
    for (i = 0; i < nprocs; i++) {
        if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
            if (send_count_arr[i].count > 0)
                send_req_arr_sz++;
        }
        /* Only aggregators should recv */
        if (fd->is_agg) {
            if (recv_count_arr[i].count > 0) {
                if ((client_file_view_state_arr[i].flat_type_p =
                     (ADIOI_Flatlist_node *) ADIOI_Malloc(sizeof(ADIOI_Flatlist_node))) == NULL) {
                    fprintf(stderr, "ADIOI_Exchange_file_views: malloc " "flat_type_p failed\n");
                }
                client_file_view_state_arr[i].flat_type_p->count = recv_count_arr[i].count;
                client_file_view_state_arr[i].flat_type_p->indices =
                    (ADIO_Offset *) ADIOI_Calloc(recv_count_arr[i].count, sizeof(ADIO_Offset));
                client_file_view_state_arr[i].flat_type_p->blocklens =
                    (ADIO_Offset *) ADIOI_Calloc(recv_count_arr[i].count, sizeof(ADIO_Offset));

                /* Copy the extra data out of the stuff we Alltoall'd */
                memcpy(&client_file_view_state_arr[i].fp_ind,
                       &recv_count_arr[i].fp_ind, 6 * sizeof(ADIO_Offset));

                recv_req_arr_sz++;
            }
        }
    }

    /* Since ADIOI_Calloc may do other things we add the +1
     * to avoid a 0-size malloc */
    send_req_arr = (MPI_Request *) ADIOI_Calloc(2 * (send_req_arr_sz) + 1, sizeof(MPI_Request));

    j = 0;
    if (recv_req_arr_sz > 0) {
        assert(fd->is_agg);
        recv_req_arr = (MPI_Request *) ADIOI_Calloc(2 * (recv_req_arr_sz), sizeof(MPI_Request));
        for (i = 0; i < nprocs; i++) {
            if (recv_count_arr[i].count > 0) {
                MPI_Irecv(client_file_view_state_arr[i].flat_type_p->indices,
                          recv_count_arr[i].count, ADIO_OFFSET, i,
                          INDICES, fd->comm, &recv_req_arr[j]);
                j++;
                MPI_Irecv(client_file_view_state_arr[i].flat_type_p->blocklens,
                          recv_count_arr[i].count, ADIO_OFFSET, i,
                          BLOCK_LENS, fd->comm, &recv_req_arr[j]);
                j++;
            }
        }
    }

    if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
        j = 0;
        for (i = 0; i < nprocs; i++) {
            if (send_count_arr[i].count > 0) {
                MPI_Isend(flat_file_p->indices,
                          send_count_arr[i].count, ADIO_OFFSET, i,
                          INDICES, fd->comm, &send_req_arr[j]);
                j++;
                MPI_Isend(flat_file_p->blocklens,
                          send_count_arr[i].count, ADIO_OFFSET, i,
                          BLOCK_LENS, fd->comm, &send_req_arr[j]);
                j++;
            }
        }
    } else {
        j = 0;
        for (i = 0; i < fd->hints->cb_nodes; i++) {
            if (send_count_arr[i].count > 0) {
                MPI_Isend(flat_file_p->indices,
                          send_count_arr[i].count, ADIO_OFFSET,
                          fd->hints->ranklist[i], INDICES, fd->comm, &send_req_arr[j]);
                j++;
                MPI_Isend(flat_file_p->blocklens,
                          send_count_arr[i].count, ADIO_OFFSET,
                          fd->hints->ranklist[i], BLOCK_LENS, fd->comm, &send_req_arr[j]);
                j++;
            }
        }
    }

    /* Since ADIOI_Malloc may do other things we add the +1
     * to avoid a 0-size malloc */
#ifdef MPI_STATUSES_IGNORE
    statuses = MPI_STATUSES_IGNORE;
#else
    statuses = (MPI_Status *)
        ADIOI_Malloc(1 + 2 * MPL_MAX(send_req_arr_sz, recv_req_arr_sz)
                     * sizeof(MPI_Status));
#endif

    if (send_req_arr_sz > 0) {
        MPI_Waitall(2 * send_req_arr_sz, send_req_arr, statuses);
        ADIOI_Free(send_count_arr);
        ADIOI_Free(send_req_arr);
    }
    if (recv_req_arr_sz > 0) {
        MPI_Waitall(2 * recv_req_arr_sz, recv_req_arr, statuses);
        ADIOI_Free(recv_count_arr);
        ADIOI_Free(recv_req_arr);
    }
#ifndef MPI_STATUSES_IGNORE
    ADIOI_Free(statuses);
#endif

    if (fd->is_agg == 1) {
        ADIOI_init_view_state(file_ptr_type, nprocs, client_file_view_state_arr, TEMP_OFF);
        ADIOI_init_view_state(file_ptr_type, nprocs, client_file_view_state_arr, REAL_OFF);
    }
#ifdef DEBUG
    if (fd->is_agg == 1) {
        ADIOI_Flatlist_node *fr_node_p;
        for (i = 0; i < nprocs; i++) {
            fprintf(stderr, "client_file_view_state_arr[%d]=(fp_ind=%Ld,"
                    "disp=%Ld,byte_off=%Ld,sz=%Ld,ext=%Ld\n", i,
                    client_file_view_state_arr[i].fp_ind,
                    client_file_view_state_arr[i].disp,
                    client_file_view_state_arr[i].byte_off,
                    client_file_view_state_arr[i].sz, client_file_view_state_arr[i].ext);
        }

        fr_node_p = ADIOI_Flatten_and_find(fd->file_realm_types[fd->my - cb_nodes_index]);
        assert(fr_node_p != NULL);

        fprintf(stderr, "my file realm (idx=%d,st_off=%Ld) ",
                fd->my_cb_nodes_index, fd->file_realm_st_offs[fd->my_cb_nodes_index]);
        ADIOI_Print_flatlist_node(fr_node_p);
    }
#endif

#ifdef DEBUG2
    if (fd->is_agg == 1) {
        for (i = 0; i < nprocs; i++) {
            fprintf(stderr, "client_file_view_state_arr[%d]: ", i);
            ADIOI_Print_flatlist_node(client_file_view_state_arr[i].flat_type_p);
        }
    }
#endif
#ifdef AGGREGATION_PROFILE
    MPE_Log_event(5015, 0, NULL);
#endif
}
Exemple #4
0
int main(int argc, char **argv) {
  int myRank, numPes;

  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &numPes);
  MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
  MPI_Request sreq[2], rreq[2];

  int blockDimX, arrayDimX, arrayDimY;

  if (argc != 2 && argc != 3) {
    printf("%s [array_size] \n", argv[0]);
    printf("%s [array_size_X] [array_size_Y] \n", argv[0]);
    MPI_Abort(MPI_COMM_WORLD, -1);
  }

  if(argc == 2) {
    arrayDimY = arrayDimX = atoi(argv[1]);
  }
  else {
    arrayDimX = atoi(argv[1]);
    arrayDimY = atoi(argv[2]);
  }

  if (arrayDimX % numPes != 0) {
    printf("array_size_X % numPes != 0!\n");
    MPI_Abort(MPI_COMM_WORLD, -1);
  }

  blockDimX = arrayDimX / numPes;

  int iterations = 0, i, j;
  double error = 1.0, max_error = 0.0;

  if(myRank == 0) {
    printf("Running Jacobi on %d processors\n", numPes);
    printf("Array Dimensions: %d %d\n", arrayDimX, arrayDimY);
    printf("Block Dimensions: %d\n", blockDimX);
  }

  double **temperature;
  double **new_temperature;

  /* allocate two dimensional arrays */
  temperature = new double*[blockDimX+2];
  new_temperature = new double*[blockDimX+2];
  for (i=0; i<blockDimX+2; i++) {
    temperature[i] = new double[arrayDimY];
    new_temperature[i] = new double[arrayDimY];
  }
  for(i=0; i<blockDimX+2; i++) {
    for(j=0; j<arrayDimY; j++) {
      temperature[i][j] = 0.5;
      new_temperature[i][j] = 0.5;
    }
  }

  // boundary conditions
  if(myRank < numPes/2) {
    for(i=1; i<=blockDimX; i++)
      temperature[i][0] = 1.0;
  }

  if(myRank == numPes-1) {
    for(j=arrayDimY/2; j<arrayDimY; j++)
      temperature[blockDimX][j] = 0.0;
  }

  MPI_Barrier(MPI_COMM_WORLD);
  MPI_Pcontrol(1);
  startTime = MPI_Wtime();

  while(/*error > 0.001 &&*/ iterations < MAX_ITER) {
    iterations++;

    /* Receive my bottom and top edge */
    MPI_Irecv(&temperature[blockDimX+1][0], arrayDimY, MPI_DOUBLE, wrap_x(myRank+1), BOTTOM, MPI_COMM_WORLD, &rreq[BOTTOM-1]);
    MPI_Irecv(&temperature[0][0], arrayDimY, MPI_DOUBLE, wrap_x(myRank-1), TOP, MPI_COMM_WORLD, &rreq[TOP-1]);

    /* Send my top and bottom edge */
    MPI_Isend(&temperature[1][0], arrayDimY, MPI_DOUBLE, wrap_x(myRank-1), BOTTOM, MPI_COMM_WORLD, &sreq[BOTTOM-1]);
    MPI_Isend(&temperature[blockDimX][0], arrayDimY, MPI_DOUBLE, wrap_x(myRank+1), TOP, MPI_COMM_WORLD, &sreq[TOP-1]);

    MPI_Waitall(2, rreq, MPI_STATUSES_IGNORE);
    MPI_Waitall(2, sreq, MPI_STATUSES_IGNORE);

    for(i=1; i<blockDimX+1; i++) {
      for(j=0; j<arrayDimY; j++) {
        /* update my value based on the surrounding values */
        new_temperature[i][j] = (temperature[i-1][j]+temperature[i+1][j]+temperature[i][wrap_y(j-1)]+temperature[i][wrap_y(j+1)]+temperature[i][j]) * 0.2;
      }
    }

    max_error = error = 0.0;
    for(i=1; i<blockDimX+1; i++) {
      for(j=0; j<arrayDimY; j++) {
	error = fabs(new_temperature[i][j] - temperature[i][j]);
	if(error > max_error)
	  max_error = error;
      }
    }
 
    double **tmp;
    tmp = temperature;
    temperature = new_temperature;
    new_temperature = tmp;

    // boundary conditions
    if(myRank < numPes/2) {
      for(i=1; i<=blockDimX; i++)
	temperature[i][0] = 1.0;
    }

    if(myRank == numPes-1) {
      for(j=arrayDimY/2; j<arrayDimY; j++)
	temperature[blockDimX][j] = 0.0;
    }

    //if(myRank == 0) printf("Iteration %d %f %f %f\n", iterations, max_error, temperature[1][0], temperature[1][1]);

    MPI_Allreduce(&max_error, &error, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
  } /* end of while loop */

  MPI_Barrier(MPI_COMM_WORLD);
  MPI_Pcontrol(0);

  if(myRank == 0) {
    endTime = MPI_Wtime();
    printf("Completed %d iterations\n", iterations);
    printf("Time elapsed: %f\n", endTime - startTime);
  }

  MPI_Finalize();
  return 0;
} /* end function main */
void  mpi_waitall(int *count, int *request, int *status, int *ierr)
{
    *ierr = MPI_Waitall(*count, request, (MPI_Status *) status);
    return;
}
Exemple #6
0
int main( int argc, char *argv[] )
{
    int errs = 0;
    MPI_Comm comm;
    MPI_Request r[2];
    MPI_Status  s[2];
    int errval, errclass;
    int b1[20], b2[20], rank, size, src, dest, i;

    MTest_Init( &argc, &argv );

    /* Create some receive requests.  tags 0-9 will succeed, tags 10-19 
       will be used for ERR_TRUNCATE (fewer than 20 messages will be used) */
    comm = MPI_COMM_WORLD;

    MPI_Comm_rank( comm, &rank );
    MPI_Comm_size( comm, &size );

    src  = 1;
    dest = 0;
    if (rank == dest) {
	MPI_Errhandler_set( comm, MPI_ERRORS_RETURN );
	errval = MPI_Irecv( b1, 10, MPI_INT, src, 0, comm, &r[0] );
	if (errval) {
	    errs++;
	    MTestPrintError( errval );
	    printf( "Error returned from Irecv\n" );
	}
	errval = MPI_Irecv( b2, 10, MPI_INT, src, 10, comm, &r[1] );
	if (errval) {
	    errs++;
	    MTestPrintError( errval );
	    printf( "Error returned from Irecv\n" );
	}

	errval = MPI_Barrier(comm);
	if (errval) {
	    errs++;
	    MTestPrintError( errval );
	    printf( "Error returned from Barrier\n" );
	}
	for (i=0; i<2; i++) {
	    s[i].MPI_ERROR = -1;
	}
	errval = MPI_Waitall( 2, r, s );
        MPI_Error_class( errval, &errclass );
	if (errclass != MPI_ERR_IN_STATUS) {
	    errs++;
	    printf( "Did not get ERR_IN_STATUS in Waitall\n" );
	}
	else {
	    /* Check for success */
	    /* We allow ERR_PENDING (neither completed nor in error) in case
	       the MPI implementation exits the Waitall when an error 
	       is detected. Thanks to Jim Hoekstra of Iowa State University
	       and Kim McMahon for finding this bug in the test. */
	    for (i=0; i<2; i++) {
		if (s[i].MPI_TAG < 10 && (s[i].MPI_ERROR != MPI_SUCCESS &&
			                  s[i].MPI_ERROR != MPI_ERR_PENDING)) {
		    char msg[MPI_MAX_ERROR_STRING];
		    int msglen = MPI_MAX_ERROR_STRING;
		    errs++;
		    printf( "correct msg had error code %d\n", 
			    s[i].MPI_ERROR );
		    MPI_Error_string( s[i].MPI_ERROR, msg, &msglen );
		    printf( "Error message was %s\n", msg );
		}
		else if (s[i].MPI_TAG >= 10 && s[i].MPI_ERROR == MPI_SUCCESS) {
		    errs++;
		    printf( "truncated msg had MPI_SUCCESS\n" );
		}
	    }
	}

    }
    else if (rank == src) {
	/* Send messages, then barrier so that the wait does not start 
	   until we are sure that the sends have begun */
	MPI_Send( b1, 10, MPI_INT, dest, 0, comm );
	MPI_Send( b2, 11, MPI_INT, dest, 10, comm );
	MPI_Barrier(comm);
    }
    else {
	MPI_Barrier(comm);
    }

    MTest_Finalize( errs );
    MPI_Finalize();
    return 0;
  
}
Exemple #7
0
int main(int argc, char **argv)
{
    int errs = 0;
    MPI_Status status, *status_array = 0;
    int count = 0, flag, idx, rc, errlen, *indices=0, outcnt;
    MPI_Request *reqs = 0;
    char errmsg[MPI_MAX_ERROR_STRING];

    MTest_Init(&argc, &argv);

    MPI_Comm_set_errhandler( MPI_COMM_WORLD, MPI_ERRORS_RETURN );

    rc = MPI_Testall( count, reqs, &flag, status_array );
    if (rc != MPI_SUCCESS) {
	MPI_Error_string( rc, errmsg, &errlen );
	printf( "MPI_Testall returned failure: %s\n", errmsg );
	errs ++;
    }
    else if (!flag) {
	printf( "MPI_Testall( 0, ... ) did not return a true flag\n") ;
	errs++;
    }

    rc = MPI_Waitall( count, reqs, status_array );
    if (rc != MPI_SUCCESS) {
	MPI_Error_string( rc, errmsg, &errlen );
	printf( "MPI_Waitall returned failure: %s\n", errmsg );
	errs ++;
    }

    rc = MPI_Testany( count, reqs, &idx, &flag, &status );
    if (rc != MPI_SUCCESS) {
	MPI_Error_string( rc, errmsg, &errlen );
	printf( "MPI_Testany returned failure: %s\n", errmsg );
	errs ++;
    }
    else if (!flag) {
	printf( "MPI_Testany( 0, ... ) did not return a true flag\n") ;
	errs++;
    }

    rc = MPI_Waitany( count, reqs, &idx, &status );
    if (rc != MPI_SUCCESS) {
	MPI_Error_string( rc, errmsg, &errlen );
	printf( "MPI_Waitany returned failure: %s\n", errmsg );
	errs ++;
    }

    rc = MPI_Testsome( count, reqs, &outcnt, indices, status_array );
    if (rc != MPI_SUCCESS) {
	MPI_Error_string( rc, errmsg, &errlen );
	printf( "MPI_Testsome returned failure: %s\n", errmsg );
	errs ++;
    }

    rc = MPI_Waitsome( count, reqs, &outcnt, indices, status_array );
    if (rc != MPI_SUCCESS) {
	MPI_Error_string( rc, errmsg, &errlen );
	printf( "MPI_Waitsome returned failure: %s\n", errmsg );
	errs ++;
    }
    
    MTest_Finalize( errs );
    MPI_Finalize();
    return 0;
}
Exemple #8
0
void  boundary_conditions(double ****f)
{
   int i, j, k, l, req_numS=0, req_numR=0;
   int r1,r2,z1,z2;
   int flag,cnt,z[7],ztemp,tag=10;
   double vrho,vphi,vth,An;
   char msg_err[100];       //for putlog+mpi_error
   int reslen;

   z[0]=1; z[1]=z[2]=z[3]=-1; //  влияет на вид гран.условий (-1:жесткие, 1:свободные)
   z[4]=-1; z[5]=1;          // rather for Anorm&Atau not Ar,Aphi,Az

  /*============================ divertor =================================*/
  if(t_cur>100)                  // divertors are off till t sec
  if(n[2]==0)
  for(i=0;i<m1;i++)
    for(j=ghost;j<=ghost;j++)
      for(k=0;k<m3;k++)
         {
         vrho = f[3][i][j][k]*costh[i][k]+f[1][i][j][k]*sinth[i][k];
         vth  = -f[3][i][j][k]*sinth[i][k]+f[1][i][j][k]*costh[i][k];
         vphi = sqrt(pow(f[2][i][j][k],2)+pow(vth,2));                    //=sqrt(vfi^2+vth^2)
         f[1][i][j][k] = vrho*sinth[i][k]+vphi*costh[i][k]*sin(chi[i][k]);
         f[2][i][j][k] = vphi*cos(chi[i][k]);
         f[3][i][j][k] = vrho*costh[i][k]-vphi*sinth[i][k]*sin(chi[i][k]);
         }

  /*-------------------------------- exchanging of ghosts -------------------------------------*/
// exchanging in phi-direction - periodical directions first
 if(pr_neighbour[2]>-1)
  if(pr_neighbour[2]==rank) CopyGridToBuffer(f,nut,buf_recv[2],0,n2,0,m1-1,mm2-1,m3-1);
         else { CopyGridToBuffer(f,nut,buf_send[2],0,ghost,0,m1-1,2*ghost-1,m3-1);
                MPI_Isend(buf_send[2],buf_size[1],MPI_DOUBLE,pr_neighbour[2],tag+2,MPI_COMM_WORLD,&SendRequest[req_numS++]);
                MPI_Irecv(buf_recv[2],buf_size[1],MPI_DOUBLE,pr_neighbour[2],tag+3,MPI_COMM_WORLD,&RecvRequest[req_numR++]);
                }
 if(pr_neighbour[3]>-1)
  if(pr_neighbour[3]==rank) CopyGridToBuffer(f,nut,buf_recv[3],0,ghost,0,m1-1,2*ghost-1,m3-1);
         else { CopyGridToBuffer(f,nut,buf_send[3],0,n2,0,m1-1,mm2-1,m3-1);
                MPI_Isend(buf_send[3],buf_size[1],MPI_DOUBLE,pr_neighbour[3],tag+3,MPI_COMM_WORLD,&SendRequest[req_numS++]);
                MPI_Irecv(buf_recv[3],buf_size[1],MPI_DOUBLE,pr_neighbour[3],tag+2,MPI_COMM_WORLD,&RecvRequest[req_numR++]);
                }
   MPI_Waitall(req_numR,RecvRequest,statuses);
   if(statuses[0].MPI_ERROR) {putlog("bc:error during transfer=",numlog++);
                               MPI_Error_string(statuses[0].MPI_ERROR,msg_err,&reslen);
                               msg_err[reslen++] = ','; msg_err[reslen]= 0;
                               putlog(msg_err,numlog++);
                               }    else numlog++;
  if(pr_neighbour[2]>-1) CopyBufferToGrid(f,nut,buf_recv[2],0,0,0,m1-1,ghost-1,m3-1);
  if(pr_neighbour[3]>-1) CopyBufferToGrid(f,nut,buf_recv[3],0,mm2,0,m1-1,m2-1,m3-1);

// exchanging in r-direction
 if(pr_neighbour[0]>-1)
  if(pr_neighbour[0]==rank) CopyGridToBuffer(f,nut,buf_recv[0],n1,0,0,mm1-1,m2-1,m3-1);
         else { CopyGridToBuffer(f,nut,buf_send[0],ghost,0,0,2*ghost-1,m2-1,m3-1);
                MPI_Isend(buf_send[0],buf_size[0],MPI_DOUBLE,pr_neighbour[0],tag,MPI_COMM_WORLD,&SendRequest[req_numS++]);
                MPI_Irecv(buf_recv[0],buf_size[0],MPI_DOUBLE,pr_neighbour[0],tag+1,MPI_COMM_WORLD,&RecvRequest[req_numR++]);
                }
 if(pr_neighbour[1]>-1)
  if(pr_neighbour[1]==rank) CopyGridToBuffer(f,nut,buf_recv[1],ghost,0,0,2*ghost-1,m2-1,m3-1);
         else { CopyGridToBuffer(f,nut,buf_send[1],n1,0,0,mm1-1,m2-1,m3-1);
                MPI_Isend(buf_send[1],buf_size[0],MPI_DOUBLE,pr_neighbour[1],tag+1,MPI_COMM_WORLD,&SendRequest[req_numS++]);
                MPI_Irecv(buf_recv[1],buf_size[0],MPI_DOUBLE,pr_neighbour[1],tag,MPI_COMM_WORLD,&RecvRequest[req_numR++]);
                }
  MPI_Waitall(req_numR,RecvRequest,statuses);
  if(statuses[0].MPI_ERROR) {putlog("bc:error during transfer=",numlog++);
                               MPI_Error_string(statuses[0].MPI_ERROR,msg_err,&reslen);
                               msg_err[reslen++] = ','; msg_err[reslen]= 0;
                               putlog(msg_err,numlog++);
                               }    else numlog++;
  if(pr_neighbour[0]>-1) CopyBufferToGrid(f,nut,buf_recv[0],0,0,0,ghost-1,m2-1,m3-1);
  if(pr_neighbour[1]>-1) CopyBufferToGrid(f,nut,buf_recv[1],mm1,0,0,m1-1,m2-1,m3-1);

// exchanging in z-direction
 if(pr_neighbour[4]>-1)
  if(pr_neighbour[4]==rank) CopyGridToBuffer(f,nut,buf_recv[4],0,0,mm3,m1-1,m2-1,m3-1);
         else { CopyGridToBuffer(f,nut,buf_send[4],0,0,ghost,m1-1,m2-1,2*ghost-1);
                MPI_Isend(buf_send[4],buf_size[2],MPI_DOUBLE,pr_neighbour[4],tag+4,MPI_COMM_WORLD,&SendRequest[req_numS++]);
                MPI_Irecv(buf_recv[4],buf_size[2],MPI_DOUBLE,pr_neighbour[4],tag+5,MPI_COMM_WORLD,&RecvRequest[req_numR++]);
                }
 if(pr_neighbour[5]>-1)
  if(pr_neighbour[5]==rank) CopyGridToBuffer(f,nut,buf_recv[5],0,0,0,m1-1,m2-1,ghost-1);
        else { CopyGridToBuffer(f,nut,buf_send[5],0,0,n3,m1-1,m2-1,mm3-1);
               MPI_Isend(buf_send[5],buf_size[2],MPI_DOUBLE,pr_neighbour[5],tag+5,MPI_COMM_WORLD,&SendRequest[req_numS++]);
               MPI_Irecv(buf_recv[5],buf_size[2],MPI_DOUBLE,pr_neighbour[5],tag+4,MPI_COMM_WORLD,&RecvRequest[req_numR++]);
               }
  MPI_Waitall(req_numR,RecvRequest,statuses);
  if(statuses[0].MPI_ERROR) {putlog("bc:error during transfer=",numlog++);
                               MPI_Error_string(statuses[0].MPI_ERROR,msg_err,&reslen);
                               msg_err[reslen++] = ','; msg_err[reslen]= 0;
                               putlog(msg_err,numlog++);
                               }    else numlog++;
  if(pr_neighbour[4]>-1) CopyBufferToGrid(f,nut,buf_recv[4],0,0,0,m1-1,m2-1,ghost-1);
  if(pr_neighbour[5]>-1) CopyBufferToGrid(f,nut,buf_recv[5],0,0,mm3,m1-1,m2-1,m3-1);

//    MPI_Barrier(MPI_COMM_WORLD);
//    MPI_Startall(req_numR,RecvRequest);
//    MPI_Iprobe(MPI_ANY_SOURCE,MPI_ANY_TAG,MPI_COMM_WORLD,&flag,statuses);
//    if(flag==0) putlog("bc:error during transfer=",numlog++);
//    MPI_Testall(req_numR,RecvRequest,&flag,statuses);
//    MPI_Get_count(statuses,MPI_DOUBLE,&cnt);
//    MPI_Waitall(req_numR,RecvRequest,statuses);

/*----------------------- filling of ghost nodes ------------------------------*/

  for(i=0;i<m1;i++)
     for(j=0;j<m2;j++)
        for(k=0;k<m3;k++)
        {
          if(isType(node[i][k],NodeGhostFluid)) {
                r2=(r1=floor(refr_f[i][k]))+1;
                z2=(z1=floor(refz_f[i][k]))+1;
                for(l=0;l<=3;l++)
                   f[l][i][j][k] = z[l]*( (refr_f[i][k]-r2)*(f[l][r1][j][z1]*(refz_f[i][k]-z2)-f[l][r1][j][z2]*(refz_f[i][k]-z1))
                                         +(refr_f[i][k]-r1)*(f[l][r2][j][z2]*(refz_f[i][k]-z1)-f[l][r2][j][z1]*(refz_f[i][k]-z2))
                                        );
                   nut[i][j][k] = (refr_f[i][k]-r2)*(nut[r1][j][z1]*(refz_f[i][k]-z2)-nut[r1][j][z2]*(refz_f[i][k]-z1))
                                + (refr_f[i][k]-r1)*(nut[r2][j][z2]*(refz_f[i][k]-z1)-nut[r2][j][z1]*(refz_f[i][k]-z2));
                }
          if(isType(node[i][k],NodeGhostMagn)) {
                r1=floor(refr_m[i][k]+0.5);
                z1=floor(refz_m[i][k]+0.5);
                An = ( f[4][r1][j][z1]*(r1-i)+f[6][r1][j][z1]*(z1-k) )/
                     ( (r1-i)*(r1-i) + (z1-k)*(z1-k) );
/*                for(l=4;l<=6;l++)
                   f[l][i][j][k] = z[l]*f[l][r1][j][z1];*/
//                if((r1-i)*(z1-k)) ztemp = z[5]; else ztemp = z[4];
                ztemp = ((r1-i)*(z1-k)==0) ? z[5] : z[4];
                ztemp = z[5];
                f[4][i][j][k] = ztemp*f[4][r1][j][z1] + (z[4]-ztemp)*An*(r1-i);
                f[5][i][j][k] = z[5]*f[5][r1][j][z1] * (2*(rc-R)-(i-r1)*dx[0]) / (2*(rc-R)+(i-r1)*dx[0]);
                f[6][i][j][k] = ztemp*f[6][r1][j][z1] + (z[4]-ztemp)*An*(z1-k);
                }
        }

  return;
}
Exemple #9
0
void parallelComm::sendRecvPacketsV(std::vector<VPACKET> &sndPack, std::vector<VPACKET> &rcvPack)
{
  std::vector<int> scount(2*nsend);
  std::vector<int> rcount(2*nrecv);
  std::vector<MPI_Request> request(2*(nsend+nrecv));
  std::vector<MPI_Status> status(2*(nsend+nrecv));

  for (int i = 0; i < nsend; i++) {
    scount[2*i] = sndPack[i].nints;
    scount[2*i+1] = sndPack[i].nreals;
  }

  int irnum = 0;

  for (int i = 0; i < nrecv; i++)
    MPI_Irecv(&(rcount[2*i]),2,MPI_INT,rcvMap[i],10,scomm,&request[irnum++]);

  for (int i = 0; i < nsend; i++)
    MPI_Isend(&(scount[2*i]),2,MPI_INT,sndMap[i],10,scomm,&request[irnum++]);

  MPI_Waitall(irnum,request.data(),status.data());

  for (int i = 0; i < nrecv; i++)
  {
    rcvPack[i].nints = rcount[2*i];
    rcvPack[i].nreals = rcount[2*i+1];
  }

  irnum = 0;
  for (int i = 0; i < nrecv; i++)
  {
    if (rcvPack[i].nints > 0)
    {
      rcvPack[i].intData.resize(rcvPack[i].nints);
      MPI_Irecv(rcvPack[i].intData.data(),rcvPack[i].nints,MPI_INT,
                rcvMap[i],10,scomm,&request[irnum++]);
    }

    if (rcvPack[i].nreals > 0)
    {
      rcvPack[i].realData.resize(rcvPack[i].nreals);
      MPI_Irecv(rcvPack[i].realData.data(),rcvPack[i].nreals,MPI_DOUBLE,
                rcvMap[i],20,scomm,&request[irnum++]);
    }
  }

  for (int i = 0; i < nsend;i++)
  {
    if (sndPack[i].nints > 0)
    {
      MPI_Isend(sndPack[i].intData.data(),sndPack[i].nints,MPI_INT,
                sndMap[i],10,scomm,&request[irnum++]);
    }
    if (sndPack[i].nreals > 0)
    {
      MPI_Isend(sndPack[i].realData.data(),sndPack[i].nreals,MPI_DOUBLE,
                sndMap[i],20,scomm,&request[irnum++]);
    }
  }
  MPI_Waitall(irnum,request.data(),status.data());
}
Exemple #10
0
int main(int argc, char *argv[]) {

  int rank;
  int size;

  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD,&size);
  MPI_Comm_rank(MPI_COMM_WORLD,&rank);
  
  // Build a 4x3 grid of subgrids
  /* 
     +-----+-----+-----+
     |  0  |  1  |  2  |
     |(0,0)|(0,1)|(0,2)|
     +-----+-----+-----+
     |  3  |  4  |  5  |
     |(1,0)|(1,1)|(1,2)|
     +-----+-----+-----+
     |  6  |  7  |  8  |
     |(2,0)|(2,1)|(2,2)|
     +-----+-----+-----+
     |  9  |  10 |  11 |
     |(3,0)|(3,1)|(3,2)|
     +-----+-----+-----+
   */

  MPI_Comm Comm2d;
  int ndim;
  int dim[2];
  int period[2]; // for periodic conditions
  int reorder;
  int coord[2];
  
  // Setup and build cartesian grid
  ndim=2; dim[0]=4; dim[1]=3; period[0]=false; period[1]=false; reorder=true;
  MPI_Cart_create(MPI_COMM_WORLD,ndim,dim,period,reorder,&Comm2d);

  // Every processor prints it rank and coordinates
  MPI_Cart_coords(Comm2d,rank,2,coord);
  printf("P:%2d My coordinates are %d %d\n",rank,coord[0],coord[1]);

  // In Root mode: ask the rank of processor at coordinates (3,1)
  if(rank==0) {
    int id; // the requested processor id
    coord[0]=3;
    coord[1]=1;
    MPI_Cart_rank(Comm2d,coord,&id);
    printf("The processor at coords (%d, %d) has rank %d\n",coord[0],coord[1],id);
  }

  // Every processor build his neighbour map
  int nbrs[4];
  MPI_Cart_shift(Comm2d,0,1,&nbrs[UP],&nbrs[DOWN]);
  MPI_Cart_shift(Comm2d,1,1,&nbrs[LEFT],&nbrs[RIGHT]);
   
  // prints its neighbours
  if (rank==7) {
    printf("P:%2d has neighbours (u,d,l,r): %2d %2d %2d %2d\n",
	   rank,nbrs[UP],nbrs[DOWN],nbrs[LEFT],nbrs[RIGHT]);
  } 
  // if everything looks good up to here, I'll perform a communication test.
  MPI_Barrier(MPI_COMM_WORLD);

  // Making a communication test
  MPI_Request reqSendRecv[8]; // every processor sends 4 INTs and receives 4 INTs
  MPI_Status status[8];

  int out = rank; // communicate the rank number
  int in[4] = {}; // empty array
  int tag = 2; // tag

  for (int i = 0; i < 4; i++) { // following the neighbours order!!
    MPI_Isend( &out ,1,MPI_INT,nbrs[i],tag,MPI_COMM_WORLD,&reqSendRecv[ i ]);
    MPI_Irecv(&in[i],1,MPI_INT,nbrs[i],tag,MPI_COMM_WORLD,&reqSendRecv[i+4]);
  }
  MPI_Waitall(8,reqSendRecv,status);

  // print the communication output
  printf("P:%2d recived from ngbr(u,d,l,r): %2d %2d %2d %2d\n",
	   rank,in[UP],in[DOWN],in[LEFT],in[RIGHT]);

  MPI_Finalize();

  return 0;
}
Exemple #11
0
int main( int argc, char *argv[] )
{
    int errs = 0;
    int rank, size, dest, source;
    int i, indices[40];
    MPI_Aint extent;
    int *buf, *bufs[MAX_MSGS];
    MPI_Comm      comm;
    MPI_Datatype  dtype;
    MPI_Request   req[MAX_MSGS];

    MTest_Init( &argc, &argv );

    comm = MPI_COMM_WORLD;
    MPI_Comm_rank( comm, &rank );
    MPI_Comm_size( comm, &size );
    source = 0;
    dest   = size - 1;
    
    /* Setup by creating a blocked datatype that is likely to be processed
       in a piecemeal fashion */
    for (i=0; i<30; i++) {
	indices[i] = i*40;
    }

    /* 30 blocks of size 10 */
    MPI_Type_create_indexed_block( 30, 10, indices, MPI_INT, &dtype );
    MPI_Type_commit( &dtype );
    
    /* Create the corresponding message buffers */
    MPI_Type_extent( dtype, &extent );
    for (i=0; i<MAX_MSGS; i++) {
	bufs[i] = (int *)malloc( extent );
	if (!bufs[i]) {
	    fprintf( stderr, "Unable to allocate buffer %d of size %ld\n", 
		    	i, (long)extent );
	    MPI_Abort( MPI_COMM_WORLD, 1 );
	}
    }
    buf = (int *)malloc( 10 * 30 * sizeof(int) );
    
    MPI_Barrier( MPI_COMM_WORLD );
    if (rank == dest) {
	MTestSleep( 2 );
	for (i=0; i<MAX_MSGS; i++) {
	    MPI_Recv( buf, 10*30, MPI_INT, source, i, comm, 
		      MPI_STATUS_IGNORE );
	}
    }
    else if (rank == source ) {
	for (i=0; i<MAX_MSGS; i++) {
	    MPI_Isend( bufs[i], 1, dtype, dest, i, comm, &req[i] );
	}
	MPI_Waitall( MAX_MSGS, req, MPI_STATUSES_IGNORE );
    }

    MPI_Type_free( &dtype );
    MTest_Finalize( errs );
    MPI_Finalize();
    return 0;
}
Exemple #12
0
int main( int argc, char **argv )
{
    MPI_Comm comm;
    MPI_Request r[MAX_REQ];
    MPI_Status  s[MAX_REQ];
    int msgsize, maxmsg, root, i, j, size, rank, err = 0, msgcnt, toterr;
    int *sbuf, *rbuf;

    MPI_Init( &argc, &argv );
    
    comm = MPI_COMM_WORLD;

    MPI_Comm_size( comm, &size );
    MPI_Comm_rank( comm, &rank );

    if (size < 2) {
	printf( "This test requires at least 2 processors\n" );
	MPI_Abort( comm, 1 );
    }

    /* First, try large blocking sends to root */
    root = 0;
    
    maxmsg =  MAX_MSG;
    msgsize = 128;
    msgcnt  = MAX_MSG_CNT;
    if (rank == root && verbose) printf( "Blocking sends: " );
    while (msgsize <= maxmsg) {
	if (rank == root) {
	    if (verbose) { printf( "%d ", msgsize ); fflush( stdout ); }
	    rbuf = (int *)malloc( msgsize * sizeof(int) );
	    if (!rbuf) {
		printf( "Could not allocate %d words\n", msgsize );
		MPI_Abort( comm, 1 );
	    }
	    for (i=0; i<size; i++) {
		if (i == rank) continue;
		for (j=0; j<msgcnt; j++) {
		    SetupRdata( rbuf, msgsize );
		    MPI_Recv( rbuf, msgsize, MPI_INT, i, 2*i, comm, s );
		    err += CheckData( rbuf, msgsize, 2*i, s );
		}
	    }
	    free( rbuf );
	}
	else {
	    sbuf = (int *)malloc( msgsize * sizeof(int) );
	    if (!sbuf) {
		printf( "Could not allocate %d words\n", msgsize );
		MPI_Abort( comm, 1 );
	    }
	    SetupData( sbuf, msgsize, 2*rank );
	    for (j=0; j<msgcnt; j++) 
		MPI_Send( sbuf, msgsize, MPI_INT, root, 2*rank, comm );
	    free( sbuf );
	}
	msgsize *= 4;
    }
    if (rank == 0 && verbose) { printf( "\n" ); fflush( stdout ); }

    /* Next, try unexpected messages with Isends */
    msgsize = 128;
    maxmsg  = MAX_MSG;
    msgcnt  = MAX_REQ;
    if (rank == root && verbose) printf( "Unexpected recvs: " );
    while (msgsize <= maxmsg) {
	if (rank == root) {
	    if (verbose) { printf( "%d ", msgsize ); fflush( stdout ); }
	    rbuf = (int *)malloc( msgsize * sizeof(int) );
	    if (!rbuf) {
		printf( "Could not allocate %d words\n", msgsize );
		MPI_Abort( comm, 1 );
	    }
	    MPI_Barrier( comm );
	    for (i=0; i<size; i++) {
		if (i == rank) continue;
		for (j=0; j<msgcnt; j++) {
		    SetupRdata( rbuf, msgsize );
		    MPI_Recv( rbuf, msgsize, MPI_INT, i, 2*i, comm, s );
		    err += CheckData( rbuf, msgsize, 2*i, s );
		}
	    }
	    free( rbuf );
	}
	else {
	    sbuf = (int *)malloc( msgsize * sizeof(int) );
	    if (!sbuf) {
		printf( "Could not allocate %d words\n", msgsize );
		MPI_Abort( comm, 1 );
	    }
	    SetupData( sbuf, msgsize, 2*rank );
	    for (j=0; j<msgcnt; j++) {
		MPI_Isend( sbuf, msgsize, MPI_INT, root, 2*rank, comm, &r[j] );
	    }
	    MPI_Barrier( comm );
	    MPI_Waitall( msgcnt, r, s );
	    free( sbuf );
	}
	msgsize *= 4;
    }
    if (rank == 0 && verbose) { printf( "\n" ); fflush( stdout ); }

    /* Try large synchronous blocking sends to root */
    root = 0;
    
    msgsize = 128;
    maxmsg  = MAX_MSG;
    if (rank == root && verbose) printf( "Synchronous sends: " );
    while (msgsize <= maxmsg) {
	if (rank == root) {
	    if (verbose) { printf( "%d ", msgsize ); fflush( stdout ); }
	    rbuf = (int *)malloc( msgsize * sizeof(int) );
	    if (!rbuf) {
		printf( "Could not allocate %d words\n", msgsize );
		MPI_Abort( comm, 1 );
	    }
	    for (i=0; i<size; i++) {
		if (i == rank) continue;
		for (j=0; j<msgcnt; j++) {
		    SetupRdata( rbuf, msgsize );
		    MPI_Recv( rbuf, msgsize, MPI_INT, i, 2*i, comm, s );
		    err += CheckData( rbuf, msgsize, 2*i, s );
		}
	    }
	    free( rbuf );
	}
	else {
	    sbuf = (int *)malloc( msgsize * sizeof(int) );
	    if (!sbuf) {
		printf( "Could not allocate %d words\n", msgsize );
		MPI_Abort( comm, 1 );
	    }
	    SetupData( sbuf, msgsize, 2*rank );
	    for (j=0; j<msgcnt; j++) 
		MPI_Ssend( sbuf, msgsize, MPI_INT, root, 2*rank, comm );
	    free( sbuf );
	}
	msgsize *= 4;
    }
    if (rank == 0 && verbose) { printf( "\n" ); fflush( stdout ); }

    MPI_Allreduce( &err, &toterr, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
    
    if (rank == 0) {
	if (toterr == 0) printf( "No errors\n" );
	else printf( "!! found %d errors\n", toterr );
    }
    if (toterr) {
	printf( "!! found %d errors on processor %d\n", err, rank );
    }

    MPI_Finalize( );
    return 0;
}
Exemple #13
0
int
main (int argc, char **argv)
{
  int nprocs = -1;
  int rank = -1;
  char processor_name[128];
  int namelen = 128;
  int buf0[buf_size];
  int buf1[buf_size];
  MPI_Request aReq[2];
  MPI_Status aStatus[2];

  MPI_Status status;

  /* init */
  MPI_Init (&argc, &argv);
  MPI_Comm_size (MPI_COMM_WORLD, &nprocs);
  MPI_Comm_rank (MPI_COMM_WORLD, &rank);
  MPI_Get_processor_name (processor_name, &namelen);
  printf ("(%d) is alive on %s\n", rank, processor_name);
  fflush (stdout);

  MPI_Barrier (MPI_COMM_WORLD);

  if (nprocs < 2) {
      printf ("not enough tasks\n");
  }
  else {
    if (rank == 0) {
      memset (buf0, 0, buf_size);

      MPI_Send_init (buf0, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &aReq[0]);
      MPI_Recv_init (buf1, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &aReq[1]);

      MPI_Start (&aReq[0]);
      MPI_Start (&aReq[1]);

      MPI_Waitall (2, aReq, aStatus);

      memset (buf0, 1, buf_size);

      MPI_Startall (2, aReq);

      MPI_Waitall (2, aReq, aStatus);
    }
    else if (rank == 1) {
      memset (buf1, 1, buf_size);

      MPI_Recv_init (buf0, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD, &aReq[0]);
      MPI_Send_init (buf1, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD, &aReq[1]);

      MPI_Start (&aReq[0]);
      MPI_Start (&aReq[1]);

      MPI_Waitall (2, aReq, aStatus);

      memset (buf1, 0, buf_size);

      MPI_Startall (2, aReq);

      MPI_Waitall (2, aReq, aStatus);
    }
  }

  MPI_Barrier (MPI_COMM_WORLD);

  MPI_Request_free (&aReq[0]);
  MPI_Request_free (&aReq[1]);

  MPI_Finalize ();
  printf ("(%d) Finished normally\n", rank);
}
Exemple #14
0
double find_bibw(int size, int num_pairs, char *s_buf, char *r_buf)
{
/*This function is the bandwidth test that was previously part of main in 
  osu_bibw, with the additional modification of being able to stream multiple
  stream multiple processors per node.  As of the InfiniPath 1.3 release
  the code can also dynamically determine which processes are on which
  node and set things up appropriately.*/
  double t_start = 0.0, t_end = 0.0, t = 0.0, max_time = 0.0, min_time = 0.0;
  double seconds_per_message_size, sum_loops, dloops;
  int i, j, myid, target, skip, loops, min_loops, max_loops, window_size;
  for (i = 0; i < size; i++)
  {
    s_buf[i] = 'a';
    r_buf[i] = 'b';
  }

  if (size < large_message_size)
  {
    skip = skip_small;
    min_loops = min_loops_small;
    max_loops = max_loops_small;
    window_size = window_size_small;
    seconds_per_message_size = seconds_per_message_size_small;
  }
  else
  {
    skip = skip_large;
    min_loops = min_loops_large;
    max_loops = max_loops_large;
    window_size = window_size_large;
    seconds_per_message_size = seconds_per_message_size_large;
  }
  MPI_Comm_rank (MPI_COMM_WORLD, &myid);
  MPI_Barrier (MPI_COMM_WORLD);
  if (pair_list[myid].sender == myid)
  {
    target = pair_list[myid].receiver;
    for (i = 0; i < max_loops + skip; i++)
    {
      if (i == skip)
      {
        MPI_Barrier (MPI_COMM_WORLD);
        t_start = MPI_Wtime();
      }
      for (j = 0; j < window_size; j++)
      {
        MPI_Irecv (r_buf, size, MPI_CHAR, target, TAG_DATA,
                  MPI_COMM_WORLD, recv_request + j);
      }
      for (j = 0; j < window_size; j++)
      {
        MPI_Isend (s_buf, size, MPI_CHAR, target, TAG_DATA,
                  MPI_COMM_WORLD, send_request + j);
      }
      MPI_Waitall (window_size, send_request, reqstat);
      MPI_Waitall (window_size, recv_request, reqstat);
      MPI_Recv (r_buf, 4, MPI_CHAR, target, MPI_ANY_TAG, MPI_COMM_WORLD,
               &reqstat[0]);
      if (reqstat[0].MPI_TAG == TAG_DONE)
      {
        t_end = MPI_Wtime();
	i++;
	break;
      }
    }
    if (t_end == 0.0)
    {
      t_end = MPI_Wtime();
    }
    loops = i - skip;
    t = t_end - t_start;
  }
  else if (pair_list[myid].receiver == myid)
  {
    int tag = TAG_SKIP;
    target = pair_list[myid].sender;
    for (i = 0; i < max_loops + skip; i++)
    {
      if (i == skip)
      {
	tag = TAG_LOOP;
        MPI_Barrier (MPI_COMM_WORLD);
        t_start = MPI_Wtime();
      }
      for (j = 0; j < window_size; j++)
      {
        MPI_Isend (s_buf, size, MPI_CHAR, target, TAG_DATA,
                  MPI_COMM_WORLD, send_request + j);
      }
      for (j = 0; j < window_size; j++)
      {
        MPI_Irecv (r_buf, size, MPI_CHAR, target, TAG_DATA,
                  MPI_COMM_WORLD, recv_request + j);
      }
      MPI_Waitall (window_size, send_request, reqstat);
      MPI_Waitall (window_size, recv_request, reqstat);
      if (tag == TAG_LOOP &&
          (i - skip) >= (min_loops - 1) &&
          MPI_Wtime() - t_start >= seconds_per_message_size)
      {
        MPI_Send (s_buf, 4, MPI_CHAR, target, TAG_DONE, MPI_COMM_WORLD);
	i++;
	break;
      }
      else {
        MPI_Send (s_buf, 4, MPI_CHAR, target, tag, MPI_COMM_WORLD);
      }
    }
    loops = i - skip;
  }
  else
  {
    MPI_Barrier(MPI_COMM_WORLD);
  }
  MPI_Reduce (&t, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, mpi_comm_sender);
  MPI_Reduce (&t, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0, mpi_comm_sender);
  MPI_Reduce (&loops, &max_loops, 1, MPI_INTEGER, MPI_MAX, 0, mpi_comm_sender);
  MPI_Reduce (&loops, &min_loops, 1, MPI_INTEGER, MPI_MIN, 0, mpi_comm_sender);
  dloops = (double) loops;
  MPI_Reduce (&dloops, &sum_loops, 1, MPI_DOUBLE, MPI_SUM, 0, mpi_comm_sender);

  if (myid==0)
  {
    double mbytes = ( (size * 2.0) / (1000 * 1000) ) * sum_loops * window_size;
    double bw = mbytes / max_time;
    if (debug)
    {
      printf("%d bytes, %.2f MB/s, %d to %d loops (range %.2f%%), "
             "%.3f to %.3f secs (range %.2f%%)\n",
             size, bw,
             min_loops, max_loops,
             100.0 * ((double) max_loops / (double) min_loops - 1),
             min_time, max_time,
             100.0 * (max_time / min_time - 1));
    }
    return bw;
  }
  return 0;
}
Exemple #15
0
/*! \brief Called by PME-only ranks to receive coefficients and coordinates
 *
 * \param[in,out] pme_pp    PME-PP communication structure.
 * \param[out] natoms       Number of received atoms.
 * \param[out] box        System box, if received.
 * \param[out] maxshift_x        Maximum shift in X direction, if received.
 * \param[out] maxshift_y        Maximum shift in Y direction, if received.
 * \param[out] lambda_q         Free-energy lambda for electrostatics, if received.
 * \param[out] lambda_lj         Free-energy lambda for Lennard-Jones, if received.
 * \param[out] bEnerVir          Set to true if this is an energy/virial calculation step, otherwise set to false.
 * \param[out] step              MD integration step number.
 * \param[out] grid_size         PME grid size, if received.
 * \param[out] ewaldcoeff_q         Ewald cut-off parameter for electrostatics, if received.
 * \param[out] ewaldcoeff_lj         Ewald cut-off parameter for Lennard-Jones, if received.
 * \param[out] atomSetChanged    Set to true only if the local domain atom data (charges/coefficients)
 *                               has been received (after DD) and should be reinitialized. Otherwise not changed.
 *
 * \retval pmerecvqxX             All parameters were set, chargeA and chargeB can be NULL.
 * \retval pmerecvqxFINISH        No parameters were set.
 * \retval pmerecvqxSWITCHGRID    Only grid_size and *ewaldcoeff were set.
 * \retval pmerecvqxRESETCOUNTERS *step was set.
 */
static int gmx_pme_recv_coeffs_coords(gmx_pme_pp        *pme_pp,
                                      int               *natoms,
                                      matrix             box,
                                      int               *maxshift_x,
                                      int               *maxshift_y,
                                      real              *lambda_q,
                                      real              *lambda_lj,
                                      gmx_bool          *bEnerVir,
                                      int64_t           *step,
                                      ivec              *grid_size,
                                      real              *ewaldcoeff_q,
                                      real              *ewaldcoeff_lj,
                                      bool              *atomSetChanged)
{
    int status = -1;
    int nat    = 0;

#if GMX_MPI
    unsigned int flags    = 0;
    int          messages = 0;

    do
    {
        gmx_pme_comm_n_box_t cnb;
        cnb.flags = 0;

        /* Receive the send count, box and time step from the peer PP node */
        MPI_Recv(&cnb, sizeof(cnb), MPI_BYTE,
                 pme_pp->peerRankId, eCommType_CNB,
                 pme_pp->mpi_comm_mysim, MPI_STATUS_IGNORE);

        /* We accumulate all received flags */
        flags |= cnb.flags;

        *step  = cnb.step;

        if (debug)
        {
            fprintf(debug, "PME only rank receiving:%s%s%s%s%s\n",
                    (cnb.flags & PP_PME_CHARGE)        ? " charges" : "",
                    (cnb.flags & PP_PME_COORD )        ? " coordinates" : "",
                    (cnb.flags & PP_PME_FINISH)        ? " finish" : "",
                    (cnb.flags & PP_PME_SWITCHGRID)    ? " switch grid" : "",
                    (cnb.flags & PP_PME_RESETCOUNTERS) ? " reset counters" : "");
        }

        if (cnb.flags & PP_PME_FINISH)
        {
            status = pmerecvqxFINISH;
        }

        if (cnb.flags & PP_PME_SWITCHGRID)
        {
            /* Special case, receive the new parameters and return */
            copy_ivec(cnb.grid_size, *grid_size);
            *ewaldcoeff_q  = cnb.ewaldcoeff_q;
            *ewaldcoeff_lj = cnb.ewaldcoeff_lj;

            status         = pmerecvqxSWITCHGRID;
        }

        if (cnb.flags & PP_PME_RESETCOUNTERS)
        {
            /* Special case, receive the step (set above) and return */
            status = pmerecvqxRESETCOUNTERS;
        }

        if (cnb.flags & (PP_PME_CHARGE | PP_PME_SQRTC6 | PP_PME_SIGMA))
        {
            *atomSetChanged = true;

            /* Receive the send counts from the other PP nodes */
            for (auto &sender : pme_pp->ppRanks)
            {
                if (sender.rankId == pme_pp->peerRankId)
                {
                    sender.numAtoms = cnb.natoms;
                }
                else
                {
                    MPI_Irecv(&sender.numAtoms, sizeof(sender.numAtoms),
                              MPI_BYTE,
                              sender.rankId, eCommType_CNB,
                              pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]);
                }
            }
            MPI_Waitall(messages, pme_pp->req.data(), pme_pp->stat.data());
            messages = 0;

            nat = 0;
            for (const auto &sender : pme_pp->ppRanks)
            {
                nat += sender.numAtoms;
            }

            if (cnb.flags & PP_PME_CHARGE)
            {
                pme_pp->chargeA.resizeWithPadding(nat);
            }
            if (cnb.flags & PP_PME_CHARGEB)
            {
                pme_pp->chargeB.resize(nat);
            }
            if (cnb.flags & PP_PME_SQRTC6)
            {
                pme_pp->sqrt_c6A.resize(nat);
            }
            if (cnb.flags & PP_PME_SQRTC6B)
            {
                pme_pp->sqrt_c6B.resize(nat);
            }
            if (cnb.flags & PP_PME_SIGMA)
            {
                pme_pp->sigmaA.resize(nat);
            }
            if (cnb.flags & PP_PME_SIGMAB)
            {
                pme_pp->sigmaB.resize(nat);
            }
            pme_pp->x.resizeWithPadding(nat);
            pme_pp->f.resize(nat);

            /* maxshift is sent when the charges are sent */
            *maxshift_x = cnb.maxshift_x;
            *maxshift_y = cnb.maxshift_y;

            /* Receive the charges in place */
            for (int q = 0; q < eCommType_NR; q++)
            {
                real *bufferPtr;

                if (!(cnb.flags & (PP_PME_CHARGE<<q)))
                {
                    continue;
                }
                switch (q)
                {
                    case eCommType_ChargeA: bufferPtr = pme_pp->chargeA.data();  break;
                    case eCommType_ChargeB: bufferPtr = pme_pp->chargeB.data();  break;
                    case eCommType_SQRTC6A: bufferPtr = pme_pp->sqrt_c6A.data(); break;
                    case eCommType_SQRTC6B: bufferPtr = pme_pp->sqrt_c6B.data(); break;
                    case eCommType_SigmaA:  bufferPtr = pme_pp->sigmaA.data();   break;
                    case eCommType_SigmaB:  bufferPtr = pme_pp->sigmaB.data();   break;
                    default: gmx_incons("Wrong eCommType");
                }
                nat = 0;
                for (const auto &sender : pme_pp->ppRanks)
                {
                    if (sender.numAtoms > 0)
                    {
                        MPI_Irecv(bufferPtr+nat,
                                  sender.numAtoms*sizeof(real),
                                  MPI_BYTE,
                                  sender.rankId, q,
                                  pme_pp->mpi_comm_mysim,
                                  &pme_pp->req[messages++]);
                        nat += sender.numAtoms;
                        if (debug)
                        {
                            fprintf(debug, "Received from PP rank %d: %d %s\n",
                                    sender.rankId, sender.numAtoms,
                                    (q == eCommType_ChargeA ||
                                     q == eCommType_ChargeB) ? "charges" : "params");
                        }
                    }
                }
            }
        }

        if (cnb.flags & PP_PME_COORD)
        {
            /* The box, FE flag and lambda are sent along with the coordinates
             *  */
            copy_mat(cnb.box, box);
            *lambda_q       = cnb.lambda_q;
            *lambda_lj      = cnb.lambda_lj;
            *bEnerVir       = ((cnb.flags & PP_PME_ENER_VIR) != 0u);
            *step           = cnb.step;

            /* Receive the coordinates in place */
            nat = 0;
            for (const auto &sender : pme_pp->ppRanks)
            {
                if (sender.numAtoms > 0)
                {
                    MPI_Irecv(pme_pp->x[nat],
                              sender.numAtoms*sizeof(rvec),
                              MPI_BYTE,
                              sender.rankId, eCommType_COORD,
                              pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]);
                    nat += sender.numAtoms;
                    if (debug)
                    {
                        fprintf(debug, "Received from PP rank %d: %d "
                                "coordinates\n",
                                sender.rankId, sender.numAtoms);
                    }
                }
            }

            status = pmerecvqxX;
        }

        /* Wait for the coordinates and/or charges to arrive */
        MPI_Waitall(messages, pme_pp->req.data(), pme_pp->stat.data());
        messages = 0;
    }
    while (status == -1);
#else
    GMX_UNUSED_VALUE(pme_pp);
    GMX_UNUSED_VALUE(box);
    GMX_UNUSED_VALUE(maxshift_x);
    GMX_UNUSED_VALUE(maxshift_y);
    GMX_UNUSED_VALUE(lambda_q);
    GMX_UNUSED_VALUE(lambda_lj);
    GMX_UNUSED_VALUE(bEnerVir);
    GMX_UNUSED_VALUE(step);
    GMX_UNUSED_VALUE(grid_size);
    GMX_UNUSED_VALUE(ewaldcoeff_q);
    GMX_UNUSED_VALUE(ewaldcoeff_lj);
    GMX_UNUSED_VALUE(atomSetChanged);

    status = pmerecvqxX;
#endif

    if (status == pmerecvqxX)
    {
        *natoms   = nat;
    }

    return status;
}
Exemple #16
0
void parallelComm::sendRecvPacketsAll(PACKET *sndPack, PACKET *rcvPack)
{
  int i;
  int *sint,*sreal,*rint,*rreal;
  int tag,irnum;
  MPI_Request *request;
  MPI_Status *status;
  //
  sint=(int *)malloc(sizeof(int)*numprocs);
  sreal=(int *) malloc(sizeof(int)*numprocs);
  rint=(int *)malloc(sizeof(int)*numprocs);
  rreal=(int *) malloc(sizeof(int)*numprocs);
  request=(MPI_Request *) malloc(sizeof(MPI_Request)*4*numprocs);
  status=(MPI_Status *) malloc(sizeof(MPI_Status)*4*numprocs);
  //
  for(i=0;i<numprocs;i++){
    sint[i]=sndPack[i].nints;			
    sreal[i]=sndPack[i].nreals;
  }
  //
  MPI_Alltoall(sint,1,MPI_INT,rint,1,MPI_INT,scomm);
  MPI_Alltoall(sreal,1,MPI_INT,rreal,1,MPI_INT,scomm);
  //
  for(i=0;i<numprocs;i++) {
    rcvPack[i].nints=rint[i];
    rcvPack[i].nreals=rreal[i];
  }
  //
  irnum=0;
  for(i=0;i<numprocs;i++)
    {
      if (rcvPack[i].nints > 0) {
	tag=1;
	rcvPack[i].intData=(int *) malloc(sizeof(int)*rcvPack[i].nints);
	MPI_Irecv(rcvPack[i].intData,rcvPack[i].nints,
		  MPI_INT,i,
		  tag,scomm,&request[irnum++]);
      }
      if (rcvPack[i].nreals > 0) {
	tag=2;
	rcvPack[i].realData=(REAL *) malloc(sizeof(REAL)*rcvPack[i].nreals);
	MPI_Irecv(rcvPack[i].realData,rcvPack[i].nreals,
		  MPI_DOUBLE,i,
		  tag,scomm,&request[irnum++]);
      }
    }
  for(i=0;i<numprocs;i++)
    {
      if (sndPack[i].nints > 0){
	tag=1;
	MPI_Isend(sndPack[i].intData,sndPack[i].nints,
		  MPI_INT,i,
		  tag,scomm,&request[irnum++]);
      }
      if (sndPack[i].nreals > 0){
	tag=2;
	MPI_Isend(sndPack[i].realData,sndPack[i].nreals,
		  MPI_DOUBLE,i,
		  tag,scomm,&request[irnum++]);
      }
    }
  MPI_Pcontrol(1, "tioga_pc_waitall");
  MPI_Waitall(irnum,request,status);
  MPI_Pcontrol(-1, "tioga_pc_waitall");

  free(sint);
  free(sreal);
  free(rint);
  free(rreal);
  free(request);
  free(status);
}
Exemple #17
0
/*! \brief Send the PME mesh force, virial and energy to the PP-only ranks. */
static void gmx_pme_send_force_vir_ener(gmx_pme_pp *pme_pp,
                                        const rvec *f,
                                        matrix vir_q, real energy_q,
                                        matrix vir_lj, real energy_lj,
                                        real dvdlambda_q, real dvdlambda_lj,
                                        float cycles)
{
#if GMX_MPI
    gmx_pme_comm_vir_ene_t cve;
    int                    messages, ind_start, ind_end;
    cve.cycles = cycles;

    /* Now the evaluated forces have to be transferred to the PP nodes */
    messages = 0;
    ind_end  = 0;
    for (const auto &receiver : pme_pp->ppRanks)
    {
        ind_start = ind_end;
        ind_end   = ind_start + receiver.numAtoms;
        if (MPI_Isend(const_cast<void *>(static_cast<const void *>(f[ind_start])),
                      (ind_end-ind_start)*sizeof(rvec), MPI_BYTE,
                      receiver.rankId, 0,
                      pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]) != 0)
        {
            gmx_comm("MPI_Isend failed in do_pmeonly");
        }
    }

    /* send virial and energy to our last PP node */
    copy_mat(vir_q, cve.vir_q);
    copy_mat(vir_lj, cve.vir_lj);
    cve.energy_q     = energy_q;
    cve.energy_lj    = energy_lj;
    cve.dvdlambda_q  = dvdlambda_q;
    cve.dvdlambda_lj = dvdlambda_lj;
    /* check for the signals to send back to a PP node */
    cve.stop_cond = gmx_get_stop_condition();

    cve.cycles = cycles;

    if (debug)
    {
        fprintf(debug, "PME rank sending to PP rank %d: virial and energy\n",
                pme_pp->peerRankId);
    }
    MPI_Isend(&cve, sizeof(cve), MPI_BYTE,
              pme_pp->peerRankId, 1,
              pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]);

    /* Wait for the forces to arrive */
    MPI_Waitall(messages, pme_pp->req.data(), pme_pp->stat.data());
#else
    gmx_call("MPI not enabled");
    GMX_UNUSED_VALUE(pme_pp);
    GMX_UNUSED_VALUE(f);
    GMX_UNUSED_VALUE(vir_q);
    GMX_UNUSED_VALUE(energy_q);
    GMX_UNUSED_VALUE(vir_lj);
    GMX_UNUSED_VALUE(energy_lj);
    GMX_UNUSED_VALUE(dvdlambda_q);
    GMX_UNUSED_VALUE(dvdlambda_lj);
    GMX_UNUSED_VALUE(cycles);
#endif
}
Exemple #18
0
void parallelComm::recvPacketsV(void)
{
  MPI_Waitall(nwait,reqs.data(),stats.data());
}
void _amps_wait_exchange(amps_Handle handle)
{
  int i;
  int num;

  num = handle -> package -> num_send + handle -> package -> num_recv;

  if(num)
  {
     if(handle -> package -> num_recv) 
     {
	for(i = 0; i <  handle -> package -> num_recv; i++)
	{
	   AMPS_CLEAR_INVOICE(handle -> package -> recv_invoices[i]);
	}
     }
	
     MPI_Waitall(num, handle -> package -> recv_requests, 
		 handle -> package -> status);
  }

#ifdef AMPS_MPI_PACKAGE_LOWSTORAGE

  /* Needed by the DEC's; need better memory allocation strategy */
  /* Need to uncommit packages when not in use */
  /* amps_Commit followed by amps_UnCommit ????? */
  if(handle -> package -> commited) 
  {
     for(i = 0; i < handle -> package -> num_recv; i++)
     {
	if( handle -> package -> recv_invoices[i] -> mpi_type != MPI_DATATYPE_NULL )
	{
	   MPI_Type_free(&(handle -> package -> recv_invoices[i] -> mpi_type));   
	}
    
	MPI_Request_free(&(handle -> package -> recv_requests[i]));
     }
    
     for(i = 0; i < handle -> package -> num_send; i++)
     {
	if( handle -> package -> send_invoices[i] -> mpi_type != MPI_DATATYPE_NULL ) 
	{
	   MPI_Type_free(&handle -> package -> send_invoices[i] -> mpi_type);
	}

	MPI_Request_free(&(handle -> package -> send_requests[i]));
     }
    
     if(handle -> package -> recv_requests)
     {
	free(handle -> package -> recv_requests);
	handle -> package -> recv_requests = NULL;
     }
     if(handle -> package -> status) 
     {
	free(handle -> package -> status);
	handle -> package -> status = NULL;
     }
    
     handle -> package -> commited = FALSE;
  }
#endif
}
Exemple #20
0
void parallelComm::sendRecvPacketsCheck(PACKET *sndPack,PACKET *rcvPack)
{
  int i;
  int *scount,*rcount;
  int tag,irnum;
  MPI_Request *request;
  MPI_Status *status;
  //
  scount=(int *)malloc(2*sizeof(int)*nsend);
  rcount=(int *) malloc(2*sizeof(int)*nrecv);
  request=(MPI_Request *) malloc(sizeof(MPI_Request)*2*(nsend+nrecv));
  status=(MPI_Status *) malloc(sizeof(MPI_Status)*2*(nsend+nrecv));
  //
  for(i=0;i<nsend;i++){
    scount[2*i]=sndPack[i].nints;			
    scount[2*i+1]=sndPack[i].nreals;
  }
  //
  irnum=0;
  tag=1;
  //
  for(i=0;i<nrecv;i++)
    MPI_Irecv(&(rcount[2*i]),2,MPI_INT,rcvMap[i],tag,scomm,&request[irnum++]);
  //
  for(i=0;i<nsend;i++)
    MPI_Isend(&(scount[2*i]),2,MPI_INT,sndMap[i],tag,scomm,&request[irnum++]);
  //
  MPI_Waitall(irnum,request,status);

  for(i=0;i<nrecv;i++)
    {
      rcvPack[i].nints=rcount[2*i];
      rcvPack[i].nreals=rcount[2*i+1];
    }

  //for(i=0;i<nsend;i++)
  //  {
  //    printf("%d sending %d to %d\n",myid,sndPack[i].nints,sndMap[i]);
  //  }	
  //for(i=0;i<nrecv;i++)
  //  {
  //   printf("%d receiving %d from %d\n",myid,rcvPack[i].nints,rcvMap[i]);
  //  }
  //
  irnum=0;
  for(i=0;i<nrecv;i++)
    {
      if (rcvPack[i].nints > 0) {
	tag=1;
	rcvPack[i].intData=(int *) malloc(sizeof(int)*rcvPack[i].nints);
	MPI_Irecv(rcvPack[i].intData,rcvPack[i].nints,
		  MPI_INT,rcvMap[i],
		  tag,scomm,&request[irnum++]);
      }
      if (rcvPack[i].nreals > 0 ) {
	tag=2;
	rcvPack[i].realData=(REAL *) malloc(sizeof(REAL)*rcvPack[i].nreals);
	MPI_Irecv(rcvPack[i].realData,rcvPack[i].nreals,
		  MPI_DOUBLE,rcvMap[i],
		  tag,scomm,&request[irnum++]);
      }
    }
  //
  for(i=0;i<nsend;i++)
    {
      if (sndPack[i].nints > 0){
	tag=1;
	MPI_Isend(sndPack[i].intData,sndPack[i].nints,
		  MPI_INT,sndMap[i],
		  tag,scomm,&request[irnum++]);
      }
      if (sndPack[i].nreals > 0){
	tag=2;
	MPI_Isend(sndPack[i].realData,sndPack[i].nreals,
		  MPI_DOUBLE,sndMap[i],
		  tag,scomm,&request[irnum++]);
      }
    }
  MPI_Waitall(irnum,request,status);
  //
  free(scount);
  free(rcount);
  free(request);
  free(status);
}
Exemple #21
0
int main(int argc, char **argv) {

  double *x, *y;
  double mySUMx, mySUMy, mySUMxy, mySUMxx, SUMx, SUMy, SUMxy,
         SUMxx, SUMres, res, slope, y_intercept, y_estimate;
  int i,j,n,myid,numprocs,naverage,nremain,mypoints,ishift;
  int new_sleep (int *seconds);
  int std_sleep = 3;
  MPI_Status istatus;
  MPI_Status status[20];
  FILE *infile;
  MPI_Request request[20];

  infile = fopen("xydata", "r");
  if (infile == NULL) printf("error opening file\n");

  MPI_Init(&argc, &argv);
  MPI_Comm_rank (MPI_COMM_WORLD, &myid);
  MPI_Comm_size (MPI_COMM_WORLD, &numprocs);

  /* ----------------------------------------------------------
   * Step 1: Process 0 reads data and sends the value of n
   * ---------------------------------------------------------- */
  if (myid == 0) {
    printf ("Number of processes used: %d\n", numprocs);
    printf ("-------------------------------------\n");
    printf ("The x coordinates on worker processes:\n");
    /* this call is used to achieve a consistent output format */
    new_sleep (&std_sleep);
    fscanf (infile, "%d", &n);
    x = (double *) malloc (n*sizeof(double));
    y = (double *) malloc (n*sizeof(double));
    for (i=0; i<n; i++)
      fscanf (infile, "%lf %lf", &x[i], &y[i]);
    for (i=1; i<numprocs; i++)
      MPI_Isend (&n, 1, MPI_INT, i, 10, MPI_COMM_WORLD, &request[i]);
    MPI_Waitall(numprocs-1, &request[1], status);  
  }
  else {
    MPI_Irecv (&n, 1, MPI_INT, 0, 10, MPI_COMM_WORLD, &request[myid]);
    // Should wait before allocating mem
    MPI_Wait(&request[myid], &istatus);
    x = (double *) malloc (n*sizeof(double));
    y = (double *) malloc (n*sizeof(double));
  }
  /* ---------------------------------------------------------- */
  
  naverage = n/numprocs;
  nremain = n % numprocs;

  /* ----------------------------------------------------------
   * Step 2: Process 0 sends subsets of x and y 
!  * ---------------------------------------------------------- */

  if (myid == 0) {
    ishift = 0;
    for (i=1; i<numprocs; i++) {
      if (i<nremain) {
        ishift += (naverage+1);
        mypoints = naverage+1;
      } else {
        ishift += naverage;
        mypoints = naverage;
      }
      if (i == nremain)
        ishift++;
      //ishift = i*naverage;
      //mypoints = (i < numprocs -1) ? naverage : naverage + nremain;
      MPI_Isend (&ishift, 1, MPI_INT, i, 1, MPI_COMM_WORLD, &request[0]);
      MPI_Isend (&mypoints, 1, MPI_INT, i, 2, MPI_COMM_WORLD, &request[1]);
      MPI_Isend (&x[ishift], mypoints, MPI_DOUBLE, i, 3, MPI_COMM_WORLD, &request[2]);
      MPI_Isend (&y[ishift], mypoints, MPI_DOUBLE, i, 4, MPI_COMM_WORLD, &request[3]);
      MPI_Waitall(4, request, status);
    }
  }
  else {
    /* ---------------the other processes receive---------------- */
    MPI_Irecv (&ishift, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, &request[0]);
    MPI_Irecv (&mypoints, 1, MPI_INT, 0, 2, MPI_COMM_WORLD, &request[1]);
    MPI_Waitall(2, request, status); // ishift & mypoints var dependency
    MPI_Irecv (&x[ishift], mypoints, MPI_DOUBLE, 0, 3, MPI_COMM_WORLD,
	      &request[0]);
    MPI_Irecv (&y[ishift], mypoints, MPI_DOUBLE, 0, 4, MPI_COMM_WORLD,
	      &request[1]);
    MPI_Wait(&request[0], &istatus);
    printf ("id %d: ", myid);
    for (i=0; i<n; i++) printf("%4.2lf ", x[i]);
    printf ("\n");
    MPI_Wait(&request[1], &istatus);
    /* ---------------------------------------------------------- */
  }

  /* ----------------------------------------------------------
   * Step 3: Each process calculates its partial sum
   * ---------------------------------------------------------- */
  mySUMx = 0; mySUMy = 0; mySUMxy = 0; mySUMxx = 0;
  if (myid == 0) {
    ishift = 0;
    mypoints = (nremain>0) ? (naverage+1) : naverage;
  }
  for (j=0; j<mypoints; j++) {
    mySUMx = mySUMx + x[ishift+j];
    mySUMy = mySUMy + y[ishift+j];
    mySUMxy = mySUMxy + x[ishift+j]*y[ishift+j];
    mySUMxx = mySUMxx + x[ishift+j]*x[ishift+j];
  }
  
  /* ----------------------------------------------------------
   * Step 4: Process 0 receives partial sums from the others 
   * ---------------------------------------------------------- */
  if (myid != 0) {
    MPI_Isend (&mySUMx, 1, MPI_DOUBLE, 0, 5, MPI_COMM_WORLD, &request[0]);
    MPI_Isend (&mySUMy, 1, MPI_DOUBLE, 0, 6, MPI_COMM_WORLD, &request[1]);
    MPI_Isend (&mySUMxy,1, MPI_DOUBLE, 0, 7, MPI_COMM_WORLD, &request[2]);
    MPI_Isend (&mySUMxx,1, MPI_DOUBLE, 0, 8, MPI_COMM_WORLD, &request[3]);
    MPI_Waitall(4, request, status);
    // We should do Waitall for the workers (myid!=0) 
    // before MPI_Finalize
	    }
  else {
    SUMx = mySUMx; SUMy = mySUMy;
    SUMxy = mySUMxy; SUMxx = mySUMxx;
    for (i=1; i<numprocs; i++) {
      MPI_Irecv (&mySUMx, 1, MPI_DOUBLE, i, 5, MPI_COMM_WORLD, &request[0]);
      MPI_Irecv (&mySUMy, 1, MPI_DOUBLE, i, 6, MPI_COMM_WORLD, &request[1]);
      MPI_Irecv (&mySUMxy,1, MPI_DOUBLE, i, 7, MPI_COMM_WORLD, &request[2]);
      MPI_Irecv (&mySUMxx,1, MPI_DOUBLE, i, 8, MPI_COMM_WORLD, &request[3]);
      MPI_Waitall(4, request, status);

      SUMx = SUMx + mySUMx;
      SUMy = SUMy + mySUMy;
      SUMxy = SUMxy + mySUMxy;
      SUMxx = SUMxx + mySUMxx;
    }
  }

  /* ----------------------------------------------------------
   * Step 5: Process 0 does the final steps
   * ---------------------------------------------------------- */
  if (myid == 0) {
    slope = ( SUMx*SUMy - n*SUMxy ) / ( SUMx*SUMx - n*SUMxx );
    y_intercept = ( SUMy - slope*SUMx ) / n;
    /* this call is used to achieve a consistent output format */
    new_sleep (&std_sleep);
    printf ("\n");
    printf ("The linear equation that best fits the given data:\n");
    printf ("       y = %6.2lfx + %6.2lf\n", slope, y_intercept);
    printf ("--------------------------------------------------\n");
    printf ("   Original (x,y)     Estimated y     Residual\n");
    printf ("--------------------------------------------------\n");
    
    SUMres = 0;
    for (i=0; i<n; i++) {
      y_estimate = slope*x[i] + y_intercept;
      res = y[i] - y_estimate;
      SUMres = SUMres + res*res;
      printf ("   (%6.2lf %6.2lf)      %6.2lf       %6.2lf\n", 
	      x[i], y[i], y_estimate, res);
    }
    printf("--------------------------------------------------\n");
    printf("Residual sum = %6.2lf\n", SUMres);
  }

  /* ----------------------------------------------------------	*/
  MPI_Finalize();
}
Exemple #22
0
void parallelComm::sendRecvPackets(PACKET *sndPack,PACKET *rcvPack)
{
  int i;
  int *scount,*rcount;
  int tag,irnum;
  MPI_Request *request;
  MPI_Status *status;
  //
  scount=(int *)malloc(2*sizeof(int)*nsend);
  rcount=(int *) malloc(2*sizeof(int)*nrecv);
  request=(MPI_Request *) malloc(sizeof(MPI_Request)*2*(nsend+nrecv));
  status=(MPI_Status *) malloc(sizeof(MPI_Status)*2*(nsend+nrecv));
  //
  for(i=0;i<nsend;i++){
    scount[2*i]=sndPack[i].nints;			
    scount[2*i+1]=sndPack[i].nreals;
  }
  //
  irnum=0;
  tag=10;
  //
  for(i=0;i<nrecv;i++)
    MPI_Irecv(&(rcount[2*i]),2,MPI_INT,rcvMap[i],tag,scomm,&request[irnum++]);
  //
  for(i=0;i<nsend;i++)
    MPI_Isend(&(scount[2*i]),2,MPI_INT,sndMap[i],tag,scomm,&request[irnum++]);
  //
  MPI_Waitall(irnum,request,status);
  for(i=0;i<nrecv;i++)
    {
      rcvPack[i].nints=rcount[2*i];
      rcvPack[i].nreals=rcount[2*i+1];
    }
  //
  irnum=0;
  for(i=0;i<nrecv;i++)
    {
      if (rcvPack[i].nints > 0) {
  tag=10;
	rcvPack[i].intData=(int *) malloc(sizeof(int)*rcvPack[i].nints);
	MPI_Irecv(rcvPack[i].intData,rcvPack[i].nints,
		  MPI_INT,rcvMap[i],
		  tag,scomm,&request[irnum++]);
      }
      if (rcvPack[i].nreals > 0) {
  tag=20;
	rcvPack[i].realData=(REAL *) malloc(sizeof(REAL)*rcvPack[i].nreals);
	MPI_Irecv(rcvPack[i].realData,rcvPack[i].nreals,
		  MPI_DOUBLE,rcvMap[i],
		  tag,scomm,&request[irnum++]);
      }
    }
  //
  for(i=0;i<nsend;i++)
    {
      if (sndPack[i].nints > 0){
  tag=10;
	MPI_Isend(sndPack[i].intData,sndPack[i].nints,
		  MPI_INT,sndMap[i],
		  tag,scomm,&request[irnum++]);
      }
      if (sndPack[i].nreals > 0){
  tag=20;
	MPI_Isend(sndPack[i].realData,sndPack[i].nreals,
		  MPI_DOUBLE,sndMap[i],
		  tag,scomm,&request[irnum++]);
      }
    }
  MPI_Pcontrol(1, "tioga_pc_waitall");
  MPI_Waitall(irnum,request,status);
  MPI_Pcontrol(-1, "tioga_pc_waitall");
  //
  free(scount);
  free(rcount);
  free(request);
  free(status);
}
Exemple #23
0
int main(int argc, char **argv)
{
    int *buf, i, rank, nints, len;
    char *filename, *tmp;
    int errs = 0, toterrs;
    MPI_File fh;
    MPI_Status status[NR_NBOPS];
    MPI_Request request[NR_NBOPS];
    int errcode = 0;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

/* process 0 takes the file name as a command-line argument and
   broadcasts it to other processes */
    if (!rank) {
        i = 1;
        while ((i < argc) && strcmp("-fname", *argv)) {
            i++;
            argv++;
        }
        if (i >= argc) {
            fprintf(stderr, "\n*#  Usage: async -fname filename\n\n");
            MPI_Abort(MPI_COMM_WORLD, 1);
        }
        argv++;
        len = strlen(*argv);
        filename = (char *) malloc(len + 10);
        strcpy(filename, *argv);
        MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
        MPI_Bcast(filename, len + 10, MPI_CHAR, 0, MPI_COMM_WORLD);
    } else {
        MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
        filename = (char *) malloc(len + 10);
        MPI_Bcast(filename, len + 10, MPI_CHAR, 0, MPI_COMM_WORLD);
    }


    buf = (int *) malloc(SIZE);
    nints = SIZE / sizeof(int);
    for (i = 0; i < nints; i++)
        buf[i] = rank * 100000 + i;

    /* each process opens a separate file called filename.'myrank' */
    tmp = (char *) malloc(len + 10);
    strcpy(tmp, filename);
    sprintf(filename, "%s.%d", tmp, rank);

    errcode = MPI_File_open(MPI_COMM_SELF, filename,
                            MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh);
    if (errcode != MPI_SUCCESS) {
        handle_error(errcode, "MPI_File_open");
    }
    errcode = MPI_File_set_view(fh, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL);
    if (errcode != MPI_SUCCESS) {
        handle_error(errcode, "MPI_File_set_view");
    }
    for (i = 0; i < NR_NBOPS; i++) {
        errcode = MPI_File_iwrite_at(fh, nints / NR_NBOPS * i,
                                     buf + (nints / NR_NBOPS * i), nints / NR_NBOPS, MPI_INT,
                                     &(request[i]));
        if (errcode != MPI_SUCCESS) {
            handle_error(errcode, "MPI_File_iwrite");
        }
    }
    MPI_Waitall(NR_NBOPS, request, status);

    MPI_File_close(&fh);

    /* reopen the file and read the data back */

    for (i = 0; i < nints; i++)
        buf[i] = 0;
    errcode = MPI_File_open(MPI_COMM_SELF, filename,
                            MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh);
    if (errcode != MPI_SUCCESS) {
        handle_error(errcode, "MPI_File_open");
    }

    errcode = MPI_File_set_view(fh, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL);
    if (errcode != MPI_SUCCESS) {
        handle_error(errcode, "MPI_File_set_view");
    }
    for (i = 0; i < NR_NBOPS; i++) {
        errcode = MPI_File_iread_at(fh, nints / NR_NBOPS * i,
                                    buf + (nints / NR_NBOPS * i), nints / NR_NBOPS, MPI_INT,
                                    &(request[i]));
        if (errcode != MPI_SUCCESS) {
            handle_error(errcode, "MPI_File_open");
        }
    }
    MPI_Waitall(NR_NBOPS, request, status);

    MPI_File_close(&fh);

    /* check if the data read is correct */
    for (i = 0; i < nints; i++) {
        if (buf[i] != (rank * 100000 + i)) {
            errs++;
            fprintf(stderr, "Process %d: error, read %d, should be %d\n", rank, buf[i],
                    rank * 100000 + i);
        }
    }

    MPI_Allreduce(&errs, &toterrs, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
    if (rank == 0) {
        if (toterrs > 0) {
            fprintf(stderr, "Found %d errors\n", toterrs);
        } else {
            fprintf(stdout, " No Errors\n");
        }
    }

    free(buf);
    free(filename);
    free(tmp);

    MPI_Finalize();
    return 0;
}
Exemple #24
0
/* Sets error_code to MPI_SUCCESS if successful, or creates an error code
 * in the case of error.
 */
static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
				  ADIOI_Flatlist_node *flat_buf, ADIO_Offset
				  *offset_list, ADIO_Offset *len_list, int *send_size,
				  int *recv_size, ADIO_Offset off, int size,
				  int *count, int *start_pos,
				  int *partial_recv,
				  int *sent_to_proc, int nprocs,
				  int myrank, int
				  buftype_is_contig, int contig_access_count,
				  ADIO_Offset min_st_offset,
				  ADIO_Offset fd_size,
				  ADIO_Offset *fd_start, ADIO_Offset *fd_end,
				  ADIOI_Access *others_req,
				  int *send_buf_idx, int *curr_to_proc,
				  int *done_to_proc, int *hole, int iter,
				  MPI_Aint buftype_extent, int *buf_idx,
				  int *error_code)
{
    int i, j, k, *tmp_len, nprocs_recv, nprocs_send, err;
    char **send_buf = NULL;
    MPI_Request *requests, *send_req;
    MPI_Datatype *recv_types;
    MPI_Status *statuses, status;
    int *srt_len=NULL, sum;
    ADIO_Offset *srt_off=NULL;
    static char myname[] = "ADIOI_W_EXCHANGE_DATA";

/* exchange recv_size info so that each process knows how much to
   send to whom. */

    MPI_Alltoall(recv_size, 1, MPI_INT, send_size, 1, MPI_INT, fd->comm);

    /* create derived datatypes for recv */

    nprocs_recv = 0;
    for (i=0; i<nprocs; i++) if (recv_size[i]) nprocs_recv++;

    recv_types = (MPI_Datatype *)
	ADIOI_Malloc((nprocs_recv+1)*sizeof(MPI_Datatype));
/* +1 to avoid a 0-size malloc */

    tmp_len = (int *) ADIOI_Malloc(nprocs*sizeof(int));
    j = 0;
    for (i=0; i<nprocs; i++) {
	if (recv_size[i]) {
/* take care if the last off-len pair is a partial recv */
	    if (partial_recv[i]) {
		k = start_pos[i] + count[i] - 1;
		tmp_len[i] = others_req[i].lens[k];
		others_req[i].lens[k] = partial_recv[i];
	    }
	    ADIOI_Type_create_hindexed_x(count[i],
		     &(others_req[i].lens[start_pos[i]]),
	             &(others_req[i].mem_ptrs[start_pos[i]]),
			 MPI_BYTE, recv_types+j);
	    /* absolute displacements; use MPI_BOTTOM in recv */
	    MPI_Type_commit(recv_types+j);
	    j++;
	}
    }

    /* To avoid a read-modify-write, check if there are holes in the
       data to be written. For this, merge the (sorted) offset lists
       others_req using a heap-merge. */

    sum = 0;
    for (i=0; i<nprocs; i++) sum += count[i];
    /* valgrind-detcted optimization: if there is no work on this process we do
     * not need to search for holes */
    if (sum) {
        srt_off = (ADIO_Offset *) ADIOI_Malloc(sum*sizeof(ADIO_Offset));
        srt_len = (int *) ADIOI_Malloc(sum*sizeof(int));

        ADIOI_Heap_merge(others_req, count, srt_off, srt_len, start_pos,
                         nprocs, nprocs_recv, sum);
    }

/* for partial recvs, restore original lengths */
    for (i=0; i<nprocs; i++)
        if (partial_recv[i]) {
            k = start_pos[i] + count[i] - 1;
            others_req[i].lens[k] = tmp_len[i];
        }
    ADIOI_Free(tmp_len);

    /* check if there are any holes. If yes, must do read-modify-write.
     * holes can be in three places.  'middle' is what you'd expect: the
     * processes are operating on noncontigous data.  But holes can also show
     * up at the beginning or end of the file domain (see John Bent ROMIO REQ
     * #835). Missing these holes would result in us writing more data than
     * recieved by everyone else. */

    *hole = 0;
    if (sum) {
        if (off != srt_off[0]) /* hole at the front */
            *hole = 1;
        else { /* coalesce the sorted offset-length pairs */
            for (i=1; i<sum; i++) {
                if (srt_off[i] <= srt_off[0] + srt_len[0]) {
		    /* ok to cast: operating on cb_buffer_size chunks */
		    int new_len = (int)srt_off[i] + srt_len[i] - (int)srt_off[0];
		    if (new_len > srt_len[0]) srt_len[0] = new_len;
		}
		else
			break;
	    }
            if (i < sum || size != srt_len[0]) /* hole in middle or end */
                *hole = 1;
	}

        ADIOI_Free(srt_off);
        ADIOI_Free(srt_len);
    }

    if (nprocs_recv) {
	if (*hole) {
	    ADIO_ReadContig(fd, write_buf, size, MPI_BYTE,
			    ADIO_EXPLICIT_OFFSET, off, &status, &err);
	    /* --BEGIN ERROR HANDLING-- */
	    if (err != MPI_SUCCESS) {
		*error_code = MPIO_Err_create_code(err,
						   MPIR_ERR_RECOVERABLE, myname,
						   __LINE__, MPI_ERR_IO,
						   "**ioRMWrdwr", 0);
		return;
	    }
	    /* --END ERROR HANDLING-- */
	}
    }

    nprocs_send = 0;
    for (i=0; i < nprocs; i++) if (send_size[i]) nprocs_send++;

    if (fd->atomicity) {
        /* bug fix from Wei-keng Liao and Kenin Coloma */
        requests = (MPI_Request *)
	    ADIOI_Malloc((nprocs_send+1)*sizeof(MPI_Request));
        send_req = requests;
    }
    else {
        requests = (MPI_Request *)
            ADIOI_Malloc((nprocs_send+nprocs_recv+1)*sizeof(MPI_Request));
        /* +1 to avoid a 0-size malloc */

        /* post receives */
        j = 0;
        for (i=0; i<nprocs; i++) {
            if (recv_size[i]) {
                MPI_Irecv(MPI_BOTTOM, 1, recv_types[j], i, myrank+i+100*iter,
                          fd->comm, requests+j);
                j++;
            }
        }
	send_req = requests + nprocs_recv;
    }

/* post sends. if buftype_is_contig, data can be directly sent from
   user buf at location given by buf_idx. else use send_buf. */

#ifdef AGGREGATION_PROFILE
    MPE_Log_event (5032, 0, NULL);
#endif
    if (buftype_is_contig) {
	j = 0;
	for (i=0; i < nprocs; i++)
	    if (send_size[i]) {
		MPI_Isend(((char *) buf) + buf_idx[i], send_size[i],
  		            MPI_BYTE, i,  myrank+i+100*iter, fd->comm,
                                  send_req+j);
		j++;
                buf_idx[i] += send_size[i];
	    }
    }
    else if (nprocs_send) {
	/* buftype is not contig */
	send_buf = (char **) ADIOI_Malloc(nprocs*sizeof(char*));
	for (i=0; i < nprocs; i++)
	    if (send_size[i])
		send_buf[i] = (char *) ADIOI_Malloc(send_size[i]);

	ADIOI_Fill_send_buffer(fd, buf, flat_buf, send_buf,
                           offset_list, len_list, send_size,
			   send_req,
                           sent_to_proc, nprocs, myrank,
                           contig_access_count,
                           min_st_offset, fd_size, fd_start, fd_end,
                           send_buf_idx, curr_to_proc, done_to_proc, iter,
                           buftype_extent);
        /* the send is done in ADIOI_Fill_send_buffer */
    }

    if (fd->atomicity) {
        /* bug fix from Wei-keng Liao and Kenin Coloma */
        j = 0;
        for (i=0; i<nprocs; i++) {
            MPI_Status wkl_status;
	    if (recv_size[i]) {
	        MPI_Recv(MPI_BOTTOM, 1, recv_types[j], i, myrank+i+100*iter,
		          fd->comm, &wkl_status);
	        j++;
	    }
        }
    }

    for (i=0; i<nprocs_recv; i++) MPI_Type_free(recv_types+i);
    ADIOI_Free(recv_types);

    if (fd->atomicity) {
        /* bug fix from Wei-keng Liao and Kenin Coloma */
        statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send+1) * \
                                         sizeof(MPI_Status));
         /* +1 to avoid a 0-size malloc */
    }
    else {
        statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send+nprocs_recv+1) * \
                                     sizeof(MPI_Status));
        /* +1 to avoid a 0-size malloc */
    }

#ifdef NEEDS_MPI_TEST
    i = 0;
    if (fd->atomicity) {
        /* bug fix from Wei-keng Liao and Kenin Coloma */
        while (!i) MPI_Testall(nprocs_send, send_req, &i, statuses);
    }
    else {
        while (!i) MPI_Testall(nprocs_send+nprocs_recv, requests, &i, statuses);
    }
#else
    if (fd->atomicity)
        /* bug fix from Wei-keng Liao and Kenin Coloma */
        MPI_Waitall(nprocs_send, send_req, statuses);
    else
        MPI_Waitall(nprocs_send+nprocs_recv, requests, statuses);
#endif

#ifdef AGGREGATION_PROFILE
    MPE_Log_event (5033, 0, NULL);
#endif
    ADIOI_Free(statuses);
    ADIOI_Free(requests);
    if (!buftype_is_contig && nprocs_send) {
	for (i=0; i < nprocs; i++)
	    if (send_size[i]) ADIOI_Free(send_buf[i]);
	ADIOI_Free(send_buf);
    }
}
Exemple #25
0
int main(int argc, char *argv[])
{
    MPI_Request r;
    MPI_Status s;
    // int flag;
    int buf[10];
    int rbuf[10];
    int tag = 27;
    int dest = 0;
    int rank, size;

    MPI_Init( &argc, &argv );
    MPI_Comm_size( MPI_COMM_WORLD, &size );
    MPI_Comm_rank( MPI_COMM_WORLD, &rank );

    /* Create a persistent send request */
    // tout le monde prépare l'envoi à 0
    MPI_Send_init( buf, 10, MPI_INT, dest, tag, MPI_COMM_WORLD, &r );

    /* Use that request */
    if (rank == 0) {
	// on alloue un tableau de size request pour les irecv
	MPI_Request *rr = (MPI_Request *)malloc(size * sizeof(MPI_Request));
	for (int i=0; i<size; i++) {
	    // 0 va recevoir de tout le monde
	    MPI_Irecv( rbuf, 10, MPI_INT, i, tag, MPI_COMM_WORLD, &rr[i] );
	}
	// 0 va envoyer à 0
	MPI_Start( &r );
	// 0 envoi à 0
	MPI_Wait( &r, &s );
	// 0 recoit de tout le monde
	MPI_Waitall( size, rr, MPI_STATUSES_IGNORE );
	free(rr);
    }
    else {
	// non-0 va envoyer à 0
	MPI_Start( &r );
	// non-0 envoi à 0
	MPI_Wait( &r, &s );
    }

    MPI_Request_free( &r );


    // if (rank == 0)
    // 	{
    // 	    MPI_Request sr;
    // 	    /* Create a persistent receive request */
    // 	    // 0 prépare la récéption de tout le monde
    // 	    MPI_Recv_init( rbuf, 10, MPI_INT, MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &r );
    // 	    // 0 va envoyer à 0
    // 	    MPI_Isend( buf, 10, MPI_INT, 0, tag, MPI_COMM_WORLD, &sr );
    // 	    for (int i=0; i<size; i++) {
    // 		// 0 va recevoir de tout le monde
    // 		MPI_Start( &r );
    // 		// 0 recoit de tout le monde
    // 		MPI_Wait( &r, &s );
    // 	    }
    // 	    // 0 envoi à 0
    // 	    MPI_Wait( &sr, &s );
    // 	    MPI_Request_free( &r );
    // 	}
    // else {
    // 	// non-0 envoi à 0
    // 	MPI_Send( buf, 10, MPI_INT, 0, tag, MPI_COMM_WORLD );
    // }

    MPI_Finalize();
    return 0;
}
Exemple #26
0
static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
			 *flat_buf, ADIO_Offset *offset_list, ADIO_Offset
                         *len_list, int *send_size, int *recv_size,
			 int *count, int *start_pos, int *partial_send, 
			 int *recd_from_proc, int nprocs, 
			 int myrank, int
			 buftype_is_contig, int contig_access_count,
			 ADIO_Offset min_st_offset, ADIO_Offset fd_size,
			 ADIO_Offset *fd_start, ADIO_Offset *fd_end, 
			 ADIOI_Access *others_req, 
                         int iter, MPI_Aint buftype_extent, int *buf_idx)
{
    int i, j, k=0, tmp=0, nprocs_recv, nprocs_send;
    char **recv_buf = NULL; 
    MPI_Request *requests;
    MPI_Datatype send_type;
    MPI_Status *statuses;

/* exchange send_size info so that each process knows how much to
   receive from whom and how much memory to allocate. */

    MPI_Alltoall(send_size, 1, MPI_INT, recv_size, 1, MPI_INT, fd->comm);

    nprocs_recv = 0;
    for (i=0; i < nprocs; i++) if (recv_size[i]) nprocs_recv++;

    nprocs_send = 0;
    for (i=0; i<nprocs; i++) if (send_size[i]) nprocs_send++;

    requests = (MPI_Request *)
	ADIOI_Malloc((nprocs_send+nprocs_recv+1)*sizeof(MPI_Request));
/* +1 to avoid a 0-size malloc */

/* post recvs. if buftype_is_contig, data can be directly recd. into
   user buf at location given by buf_idx. else use recv_buf. */

#ifdef AGGREGATION_PROFILE
    MPE_Log_event (5032, 0, NULL);
#endif

    if (buftype_is_contig) {
	j = 0;
	for (i=0; i < nprocs; i++) 
	    if (recv_size[i]) {
		MPI_Irecv(((char *) buf) + buf_idx[i], recv_size[i], 
		  MPI_BYTE, i, myrank+i+100*iter, fd->comm, requests+j);
		j++;
		buf_idx[i] += recv_size[i];
	    }
    }
    else {
/* allocate memory for recv_buf and post receives */
	recv_buf = (char **) ADIOI_Malloc(nprocs * sizeof(char*));
	for (i=0; i < nprocs; i++) 
	    if (recv_size[i]) recv_buf[i] = 
                                  (char *) ADIOI_Malloc(recv_size[i]);

	    j = 0;
	    for (i=0; i < nprocs; i++) 
		if (recv_size[i]) {
		    MPI_Irecv(recv_buf[i], recv_size[i], MPI_BYTE, i, 
			      myrank+i+100*iter, fd->comm, requests+j);
		    j++;
#ifdef RDCOLL_DEBUG
		    DBG_FPRINTF(stderr, "node %d, recv_size %d, tag %d \n", 
		       myrank, recv_size[i], myrank+i+100*iter); 
#endif
		}
    }

/* create derived datatypes and send data */

    j = 0;
    for (i=0; i<nprocs; i++) {
	if (send_size[i]) {
/* take care if the last off-len pair is a partial send */
	    if (partial_send[i]) {
		k = start_pos[i] + count[i] - 1;
		tmp = others_req[i].lens[k];
		others_req[i].lens[k] = partial_send[i];
	    }
	    ADIOI_Type_create_hindexed_x(count[i],
		  &(others_req[i].lens[start_pos[i]]),
	            &(others_req[i].mem_ptrs[start_pos[i]]), 
			 MPI_BYTE, &send_type);
	    /* absolute displacement; use MPI_BOTTOM in send */
	    MPI_Type_commit(&send_type);
	    MPI_Isend(MPI_BOTTOM, 1, send_type, i, myrank+i+100*iter,
		      fd->comm, requests+nprocs_recv+j);
	    MPI_Type_free(&send_type);
	    if (partial_send[i]) others_req[i].lens[k] = tmp;
	    j++;
	}
    }

    statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send+nprocs_recv+1) * \
                                     sizeof(MPI_Status)); 
     /* +1 to avoid a 0-size malloc */

    /* wait on the receives */
    if (nprocs_recv) {
#ifdef NEEDS_MPI_TEST
	j = 0;
	while (!j) MPI_Testall(nprocs_recv, requests, &j, statuses);
#else
	MPI_Waitall(nprocs_recv, requests, statuses);
#endif

	/* if noncontiguous, to the copies from the recv buffers */
	if (!buftype_is_contig) 
	    ADIOI_Fill_user_buffer(fd, buf, flat_buf, recv_buf,
				   offset_list, len_list, (unsigned*)recv_size, 
				   requests, statuses, recd_from_proc, 
				   nprocs, contig_access_count,
				   min_st_offset, fd_size, fd_start, fd_end,
				   buftype_extent);
    }

    /* wait on the sends*/
    MPI_Waitall(nprocs_send, requests+nprocs_recv, statuses+nprocs_recv);

    ADIOI_Free(statuses);
    ADIOI_Free(requests);

    if (!buftype_is_contig) {
	for (i=0; i < nprocs; i++) 
	    if (recv_size[i]) ADIOI_Free(recv_buf[i]);
	ADIOI_Free(recv_buf);
    }
#ifdef AGGREGATION_PROFILE
    MPE_Log_event (5033, 0, NULL);
#endif
}
Exemple #27
0
int main(int argc, char *argv[])
{

    int myid, numprocs, i, j;
    int size, align_size;
    char *s_buf, *r_buf;
    double t_start = 0.0, t_end = 0.0, t = 0.0;

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
    MPI_Comm_rank(MPI_COMM_WORLD, &myid);

    align_size = getpagesize();
    s_buf =
        (char *) (((unsigned long) s_buf1 + (align_size - 1)) /
                  align_size * align_size);
    r_buf =
        (char *) (((unsigned long) r_buf1 + (align_size - 1)) /
                  align_size * align_size);

    if (myid == 0) {
        fprintf(stdout,
                "# OSU MPI Bidirectional Bandwidth Test (Version 2.0)\n");
        fprintf(stdout, "# Size\t\tBi-Bandwidth (MB/s) \n");
    }

    for (size = 1; size <= MAX_MSG_SIZE; size *= 2) {

        /* touch the data */
        for (i = 0; i < size; i++) {
            s_buf[i] = 'a';
            r_buf[i] = 'b';
        }

        if (size > large_message_size) {
            loop = loop_large;
            skip = skip_large;
            window_size = window_size_large;
        }

        if (myid == 0) {
            for (i = 0; i < loop + skip; i++) {

                if (i == skip)
                    t_start = MPI_Wtime();
                for (j = 0; j < window_size; j++)
                    MPI_Irecv(r_buf, size, MPI_CHAR, 1, 10,
                              MPI_COMM_WORLD, recv_request + j);
                for (j = 0; j < window_size; j++)
                    MPI_Isend(s_buf, size, MPI_CHAR, 1, 100,
                              MPI_COMM_WORLD, send_request + j);
                MPI_Waitall(window_size, send_request, reqstat);
                MPI_Waitall(window_size, recv_request, reqstat);

            }

            t_end = MPI_Wtime();
            t = t_end - t_start;

        } else if (myid == 1) {

            for (i = 0; i < loop + skip; i++) {

                for (j = 0; j < window_size; j++)
                    MPI_Irecv(r_buf, size, MPI_CHAR, 0, 100,
                              MPI_COMM_WORLD, recv_request + j);
                for (j = 0; j < window_size; j++)
                    MPI_Isend(s_buf, size, MPI_CHAR, 0, 10,
                              MPI_COMM_WORLD, send_request + j);
                MPI_Waitall(window_size, send_request, reqstat);
                MPI_Waitall(window_size, recv_request, reqstat);

            }

        }

        if (myid == 0) {
            double tmp;
            tmp = ((size * 1.0) / 1.0e6) * loop * window_size * 2;
            fprintf(stdout, "%d\t\t%f\n", size, tmp / t);
        }

    }

    MPI_Finalize();
    return 0;

}
Exemple #28
0
int main(int argc, char **argv) {

  int    	size=-1,rank=-1, left=-1, right=-1, you=-1;
  int           ndata=127,ndata_max=127,seed;
  int           rv, nsec=0, count, cmpl;
  long long int i,j,k;
  unsigned long long int  nflop=0,nmem=1,nsleep=0,nrep=1, myflops;
  char 		*env_ptr, cbuf[4096];
  double 	*sbuf, *rbuf,*x;
  MPI_Status    *s;
  MPI_Request   *r;
  time_t	ts;


   seed = time(&ts);

   flags |= DOMPI;
   while(--argc && argv++) {
  if(!strcmp("-v",*argv)) {
    flags |= DOVERBOSE;
  } else if(!strcmp("-n",*argv)) {
    --argc; argv++;
    nflop = atol(*argv);
  } else if(!strcmp("-N",*argv)) {
    --argc; argv++;
    nrep = atol(*argv);
  } else if(!strcmp("-d",*argv)) {
    --argc; argv++;
    ndata_max = ndata = atol(*argv);
  } else if(!strcmp("-m",*argv)) {
    --argc; argv++;
    nmem = atol(*argv);
  } else if(!strcmp("-w",*argv)) {
    --argc; argv++;
    nsec = atoi(*argv);
  } else if(!strcmp("-s",*argv)) {
    --argc; argv++;
    nsleep = atol(*argv);
  } else if(!strcmp("-spray",*argv)) {
    flags |= DOSPRAY;
  } else if(!strcmp("-c",*argv)) {
    flags |= CORE;
  } else if(!strcmp("-r",*argv)) {
    flags |= REGION;
  } else if(!strcmp("-stair",*argv)) {
    flags |= STAIR_RANK;
  } else if(!strcmp("-stair_region",*argv)) {
    flags |= STAIR_REGION;
  } else if(!strcmp("-nompi",*argv)) {
    flags &= ~DOMPI;
  }
 }
 
 if(flags & DOMPI) {
  MPI_Init(&argc,&argv);

/* test double init 
  MPI_Init(&argc,&argv);
*/
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 }
 
 if(nsec > 0) {
  sleep(nsec);
 }

 if(nmem) {
 nmem = (nmem*1024*1024/sizeof(double));
 x = (double *)malloc((size_t)(nmem*sizeof(double)));
 for(j=0;j<nrep;j++) {
 for(i=0;i<nmem;i++) {
  x[i] = i;
 }
 for(i=0;i<nmem;i++) {
  x[i] = i*x[i];
 }
 if(x[nmem-1]*x[nmem-1] < 0) {
  printf("assumption about flop-test or optimization failed\n");
 }
 }
 if(0) free((char *)x);
}
 
/*
#define LONG_REGNAME rshouldbethelastchar
*/
#define LONG_REGNAME abcdefghijklmnopqrst

 if(flags & REGION) {
  MPI_Pcontrol(0,"enter_region(abcdefghijklmnopqrst)");
  sprintf(cbuf,"");
  MPI_Pcontrol(0,"get_region()",cbuf);
  if(strcmp(cbuf,"abcdefghijklmnopqrst")) {
   printf("%d in region = \"%s\" not \"%s\"\n",
	 rank,cbuf,"abcdefghijklmnopqrst");
   fflush(stdout);
  }
  MPI_Pcontrol(0,"exit_region(abcdefghijklmnopqrst)");
  MPI_Pcontrol(0,"get_region()",cbuf);
  if(strcmp(cbuf,"ipm_noregion")) {
   printf("%d out region = \"%s\" not \"%s\"\n",
	 rank,cbuf,"ipm_noregion");
   fflush(stdout);
  }
 }
  
  if(flags & REGION && rank > -1 ) MPI_Pcontrol(1,"region_zzzzzzzzzzzZz"); 
 if(nflop) {
  x = (double *)malloc((size_t)(10*sizeof(double)));
  j = k = 0;
  for(i=0;i<10;i++) {
   x[i] = 1.0;
  }
if(flags & STAIR_RANK) { 
 myflops = (rank*nflop)/size;
} else {
 myflops = nflop;
}
  for(i=0;i<nflop;i++) {
   x[j] = x[j]*x[k];
   j = ((i%9)?(j+1):(0));
   k = ((i%8)?(k+1):(0));
  }
  free((char *)x);
 }

 if(nsleep) {
  sleep(nsleep);
 }
  if(flags & REGION && rank > -1 ) MPI_Pcontrol(-1,"region_zzzzzzzzzzzZz"); 
 
 if(nmem<nflop) nmem=nflop;
 
 if(nflop>1) printf("FLOPS = %lld BYTES = %lld\n", nflop, nmem);
 
 fflush(stdout);
 
 if(flags & CORE) {
  for(i=0;;i++) {
   x[i] = x[i*i-1000];
  }
 }

 if(flags & DOMPI) {
  s = (MPI_Status *)malloc((size_t)(sizeof(MPI_Status)*2*size));
  r = (MPI_Request *)malloc((size_t)(sizeof(MPI_Request)*2*size));


  sbuf = (double *)malloc((size_t)(ndata_max*sizeof(double)));
  rbuf = (double *)malloc((size_t)(ndata_max*sizeof(double)));
  for(i=0;i<ndata_max;i++) { sbuf[i] = rbuf[i] = i; }

  MPI_Bcast(&seed,1,MPI_INT,0,MPI_COMM_WORLD);
  srand48(seed);

  for(i=0;i<nrep;i++) {
   MPI_Bcast(sbuf,ndata_max,MPI_DOUBLE,0,MPI_COMM_WORLD);
  }

  if(size>1) {
  if(!rank) {left=size-1;} else { left = rank-1;}
  if(rank == size-1) { right=0;} else {right=rank+1;}
  you =  (rank < size/2)?(rank+size/2):(rank-size/2);
  } else  {
   you = left = right = rank;
  }
 


  for(i=0;i<nrep;i++) {
   if(flags & DOSPRAY) {
    ndata = (long int)(drand48()*ndata_max)+1;
   }
   MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,right,1,rbuf,ndata,MPI_DOUBLE,left,1,MPI_COMM_WORLD,s);
   MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,left,1,rbuf,ndata,MPI_DOUBLE,right,1,MPI_COMM_WORLD,s);
  if(flags & REGION) MPI_Pcontrol(1,"region_a"); 
  MPI_Barrier(MPI_COMM_WORLD);
  MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,left,1,rbuf,ndata,MPI_DOUBLE,right,1,MPI_COMM_WORLD,s);
  MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,left,1,rbuf,ndata,MPI_DOUBLE,MPI_ANY_SOURCE,1,MPI_COMM_WORLD,s);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);

  MPI_Isend(sbuf,ndata/2,MPI_DOUBLE,you,0,MPI_COMM_WORLD, r);
  MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &cmpl, s);
  MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, s);
  MPI_Get_count(s,MPI_DOUBLE,&count);
  MPI_Recv(rbuf,ndata,MPI_DOUBLE,MPI_ANY_SOURCE,0,MPI_COMM_WORLD, s);
  if(count != ndata/2) {
  printf("error: MPI_Get_count(s,MPI_DOUBLE,&count) --> count = %d\n",count);
  }
  MPI_Wait(r,s);
/* FIXME - the following case may need to be addressed
  MPI_Test(r,&cmpl,s);
  printf("spam1 %d %d\n", s->MPI_SOURCE, cmpl);
  if(r != MPI_REQUEST_NULL) {
   MPI_Wait(r,s);
   printf("spam2 %d\n", s->MPI_SOURCE);
  }
*/

  MPI_Irecv(rbuf,ndata,MPI_DOUBLE,MPI_ANY_SOURCE,0,MPI_COMM_WORLD,r);
  MPI_Send(sbuf,ndata,MPI_DOUBLE,you,0,MPI_COMM_WORLD);
  MPI_Wait(r,s);

  for(j=0;j<size;j++) {
   MPI_Isend(sbuf+j%ndata_max,1,MPI_DOUBLE,j,4,MPI_COMM_WORLD, r+j);
   MPI_Irecv(rbuf+j%ndata_max,1,MPI_DOUBLE,j,4,MPI_COMM_WORLD,r+size+j);
  }
  MPI_Waitall(2*size,r,s);
/*
  for(j=0;j<size;j++) {
   printf("rep %d stat %d %d %d\n",i, j, s[j].MPI_SOURCE, s[j+size].MPI_SOURCE);
  }
*/

  if(flags & REGION) MPI_Pcontrol(-1,"region_a"); 

  if(flags & REGION) MPI_Pcontrol(1,"region_b"); 
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  if(flags & REGION) MPI_Pcontrol(-1,"region_b"); 

 if(1) {
  if(flags & REGION) MPI_Pcontrol(1,"region_c"); 
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  if(flags & REGION) MPI_Pcontrol(-1,"region_c"); 
  if(flags & REGION) MPI_Pcontrol(1,"region_d"); 
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  if(flags & REGION) MPI_Pcontrol(-1,"region_d"); 
  if(flags & REGION) MPI_Pcontrol(1,"region_e"); 
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  if(flags & REGION) MPI_Pcontrol(-1,"region_e"); 
  if(flags & REGION) MPI_Pcontrol(1,"region_f"); 
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  if(flags & REGION) MPI_Pcontrol(-1,"region_f"); 
  if(flags & REGION) MPI_Pcontrol(1,"region_g"); 
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  if(flags & REGION) MPI_Pcontrol(-1,"region_g"); 
  if(flags & REGION) MPI_Pcontrol(1,"region_h"); 
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  if(flags & REGION) MPI_Pcontrol(-1,"region_h"); 
  if(flags & REGION) MPI_Pcontrol(1,"region_i"); 
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  if(flags & REGION) MPI_Pcontrol(-1,"region_i"); 
 }


  }


  MPI_Barrier(MPI_COMM_WORLD);

  MPI_Finalize();
  }

  free((char *)rbuf);
  free((char *)sbuf);
  free((char *)r);
  free((char *)s);

  free((char *)x);

  return 0;   
}
Exemple #29
0
int main(int argc, char **argv) {
    int my_rank;
    float * p1;
    float * p2;
    float * ptemp;
    int i, j;
    MPI_Status  stats[2*NUM_SLICES+2];
    MPI_Request requests[2*NUM_SLICES+2];
    int ierr;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); /*DETERMINE RANK OF THIS PROCESSOR*/
    printf("Hello Paul %d\n", my_rank);

    if (my_rank == 0)
    {
        init_signal(un);
        p1 = un;    
        for (j = 0; j < SLICE_WIDTH; j++)
        {
            local_data[j] = *(un+j);
        }

        // send starting data to other processes
        for (i= 1; i < NUM_SLICES; i++)
        {
            MPI_Isend(un + (SLICE_WIDTH-2* OVERLAP) * i, SLICE_WIDTH, MPI_FLOAT, i, 0, MPI_COMM_WORLD, &requests[i]);
        }
        MPI_Waitall(NUM_SLICES-1, &requests[1], &stats[1]);

        for (j=0; j< 8192; j++)
        {
           //printf("process 0 step %d\n", j);

           // update from A to B
           for (i=0; i < OVERLAP/2; i++)
           {
              update_signal(local_data, local_data_1, 0, SLICE_WIDTH);
           }

           // update from B to A
           for (i=0; i < OVERLAP/2; i++)
           {
              update_signal(local_data_1, local_data, 0, SLICE_WIDTH);
           }

           // send/receive overlaps
           MPI_Isend(local_data + (SLICE_WIDTH- 2 * OVERLAP), OVERLAP, MPI_FLOAT, 1, 0, MPI_COMM_WORLD, &requests[0]);
           MPI_Irecv(local_data + (SLICE_WIDTH -   OVERLAP), OVERLAP, MPI_FLOAT, 1, 1, MPI_COMM_WORLD, &requests[1]);
           MPI_Waitall(2, &requests[0], &stats[0]);
        }

        for (i= 1; i < NUM_SLICES; i++)
        {
            MPI_Irecv(un_1 + (SLICE_WIDTH-2* OVERLAP) * i, SLICE_WIDTH, MPI_FLOAT, i, 0, MPI_COMM_WORLD, &requests[i]);
        }
        MPI_Waitall(NUM_SLICES-1, &requests[1], &stats[1]);

        for (j = 0; j< SLICE_WIDTH; j++)
        {
             *(un_1 +j) = local_data[j];
        }
        display_signal(un_1);
    }
    else
    {
        MPI_Irecv(local_data, SLICE_WIDTH, MPI_FLOAT, 0, 0, MPI_COMM_WORLD, &requests[0]);
        MPI_Wait(&requests[0], &stats[0]);

       for (j=0; j< 8192; j++)
       {
         //printf("process  step %d\n", j);
        for (i=0; i < OVERLAP/2; i++)
        {
            update_signal(local_data, local_data_1, 0, SLICE_WIDTH);
        }

        for (i=0; i < OVERLAP/2; i++)
        {
            update_signal(local_data_1, local_data, 0, SLICE_WIDTH);
        }
        if (my_rank == (NUM_SLICES-1))
        {
            MPI_Isend(local_data + OVERLAP, OVERLAP, MPI_FLOAT, my_rank-1, 2*my_rank -1, MPI_COMM_WORLD, &requests[0]);
            MPI_Irecv(local_data, OVERLAP, MPI_FLOAT, my_rank-1, 2* my_rank-2, MPI_COMM_WORLD, &requests[1]);
            MPI_Waitall(2, &requests[0], &stats[0]);
        }
        else
        {
            MPI_Isend(local_data + OVERLAP, OVERLAP, MPI_FLOAT, my_rank-1, 2*my_rank-1, MPI_COMM_WORLD, &requests[0]);
            MPI_Irecv(local_data, OVERLAP, MPI_FLOAT, my_rank-1, 2*my_rank-2, MPI_COMM_WORLD, &requests[1]);
            MPI_Irecv(local_data + SLICE_WIDTH - OVERLAP, OVERLAP, MPI_FLOAT, my_rank+1, 2*my_rank+1, MPI_COMM_WORLD, &requests[2]);
            MPI_Isend(local_data + SLICE_WIDTH - 2 *OVERLAP, OVERLAP, MPI_FLOAT, my_rank+1, 2*my_rank, MPI_COMM_WORLD, &requests[3]);
            MPI_Waitall(4, &requests[0], &stats[0]);

        }
       }
        MPI_Isend(local_data, SLICE_WIDTH, MPI_FLOAT, 0, 0, MPI_COMM_WORLD, &requests[0]);
        MPI_Wait(&requests[0], &stats[0]);
    }

    char processor_name[128];
    int name_len;
    MPI_Get_processor_name(processor_name, &name_len);

    // Print off a hello world message
    printf("Hello world from processor %s\n",
           processor_name);

    MPI_Finalize();
}
void xchange_lexicfield32(spinor32 * const l) {

  MPI_Request requests[16];
  MPI_Status status[16];
#  ifdef PARALLELT
  int reqcount = 4;
#  elif defined PARALLELXT
  int reqcount = 8;
#  elif defined PARALLELXYT
  int reqcount = 12;
#  elif defined PARALLELXYZT
  int reqcount = 16;
#  endif
#ifdef _KOJAK_INST
#pragma pomp inst begin(xchange_lexicfield32)
#endif
#  if (defined BGL && defined XLC)
  __alignx(16, l);
#  endif

#  ifdef TM_USE_MPI


  /* send the data to the neighbour on the left */
  /* recieve the data from the neighbour on the right */
  MPI_Isend((void*)l, 1, lfield_time_slice_cont32, g_nb_t_dn, 5081, g_cart_grid, &requests[0]);
  MPI_Irecv((void*)(l+VOLUME), 1, lfield_time_slice_cont32, g_nb_t_up, 5081, g_cart_grid, &requests[1]);
#    if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
  /* send the data to the neighbour on the left in x direction */
  /* recieve the data from the neighbour on the right in x direction */
  MPI_Isend((void*)l, 1, lfield_x_slice_gath32, g_nb_x_dn, 5091, g_cart_grid,  &requests[4]);
  MPI_Irecv((void*)(l+(T+2)*LX*LY*LZ), 1, lfield_x_slice_cont32, g_nb_x_up, 5091, g_cart_grid, &requests[5]);

#    endif
  
#    if (defined PARALLELXYT || defined PARALLELXYZT)
  /* send the data to the neighbour on the left in y direction */
  /* recieve the data from the neighbour on the right in y direction */
  MPI_Isend((void*)l, 1, lfield_y_slice_gath32, g_nb_y_dn, 5101, g_cart_grid, &requests[8]);
  MPI_Irecv((void*)(l + VOLUME + 2*LZ*(LX*LY + T*LY)), 1, lfield_y_slice_cont32, g_nb_y_up, 5101, g_cart_grid, &requests[9]);
#    endif
  
#    if (defined PARALLELXYZT)
  
  /* send the data to the neighbour on the left in z direction */
  /* recieve the data from the neighbour on the right in z direction */
  MPI_Isend((void*)l, 1, lfield_z_slice_gath32, g_nb_z_dn, 5503, g_cart_grid, &requests[12]);
  MPI_Irecv((void*)(l+VOLUME + 2*LZ*(LX*LY + T*LY) + 2*LZ*T*LX), 1, lfield_z_slice_cont32, g_nb_z_up, 5503, g_cart_grid, &requests[13]); 
#    endif
  /* send the data to the neighbour on the right */
  /* recieve the data from the neighbour on the left */
  MPI_Isend((void*)(l+(T-1)*LX*LY*LZ), 1, lfield_time_slice_cont32, g_nb_t_up, 5082, g_cart_grid, &requests[2]);
  MPI_Irecv((void*)(l+(T+1)*LX*LY*LZ), 1, lfield_time_slice_cont32, g_nb_t_dn, 5082, g_cart_grid, &requests[3]);
  
#    if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
  /* send the data to the neighbour on the right in x direction */
  /* recieve the data from the neighbour on the left in x direction */  
  MPI_Isend((void*)(l+(LX-1)*LY*LZ), 1, lfield_x_slice_gath32, g_nb_x_up, 5092, g_cart_grid, &requests[6]);
  MPI_Irecv((void*)(l+((T+2)*LX*LY*LZ + T*LY*LZ)), 1, lfield_x_slice_cont32, g_nb_x_dn, 5092, g_cart_grid, &requests[7]);
#    endif
  
#    if (defined PARALLELXYT || defined PARALLELXYZT)
  /* send the data to the neighbour on the right in y direction */
  /* recieve the data from the neighbour on the left in y direction */  
  MPI_Isend((void*)(l+(LY-1)*LZ), 1, lfield_y_slice_gath32, g_nb_y_up, 5102, g_cart_grid, &requests[10]);
  MPI_Irecv((void*)(l+VOLUME + 2*LZ*(LX*LY + T*LY) + T*LX*LZ), 1, lfield_y_slice_cont32, g_nb_y_dn, 5102, g_cart_grid, &requests[11]);
#    endif
  
#    if defined PARALLELXYZT
  
  /* send the data to the neighbour on the right in y direction */
  /* recieve the data from the neighbour on the left in y direction */  
  MPI_Isend((void*)(l+LZ-1), 1, lfield_z_slice_gath32, g_nb_z_up, 5504, g_cart_grid, &requests[14]);
  MPI_Irecv((void*)(l+VOLUME + 2*LZ*(LX*LY + T*LY) + 2*T*LX*LZ + T*LX*LY), 1, lfield_z_slice_cont32, g_nb_z_dn, 5504, g_cart_grid, &requests[15]); 
#    endif
  
  MPI_Waitall(reqcount, requests, status);

#  endif
  return;
#ifdef _KOJAK_INST
#pragma pomp inst end(xchange_lexicfield32)
#endif
}