Ejemplo n.º 1
0
    Main(CkArgMsg* m) {
#if CMK_BLUEGENEL
      BGLPersonality bgl_p;
      int i = rts_get_personality(&bgl_p, sizeof(BGLPersonality));
#elif CMK_BLUEGENEP
      DCMF_Hardware_t bgp_hwt;
      DCMF_Hardware(&bgp_hwt);
#elif XT3_TOPOLOGY
      XT3TorusManager xt3tm;
#elif XT4_TOPOLOGY || XT5_TOPOLOGY
      XTTorusManager xttm;
#endif

      mainProxy = thishandle;
      CkPrintf("Testing TopoManager .... \n");
      TopoManager tmgr;
      CkPrintf("Torus Size [%d] [%d] [%d] [%d]\n", tmgr.getDimNX(), tmgr.getDimNY(), tmgr.getDimNZ(), tmgr.getDimNT());

#if CMK_BLUEGENEP
      CkPrintf("Torus Size [%d] [%d] [%d] [%d]\n", bgp_hwt.xSize, bgp_hwt.ySize, bgp_hwt.zSize, bgp_hwt.tSize);
#endif
      int x, y, z, t;

      for(int i=0; i<CkNumPes(); i++) {
	tmgr.rankToCoordinates(i, x, y, z, t);
	CkPrintf("---- Processor %d ---> x %d y %d z %d t %d\n", i, x, y, z, t);
#if CMK_BLUEGENEL
	unsigned int tmp_t, tmp_x, tmp_y, tmp_z;
	rts_coordinatesForRank(i, &tmp_x, &tmp_y, &tmp_z, &tmp_t);
	CkPrintf("Real Processor %d ---> x %d y %d z %d t %d\n", i, tmp_x, tmp_y, tmp_z, tmp_t);
#elif CMK_BLUEGENEP
	unsigned int tmp_t, tmp_x, tmp_y, tmp_z;
    #if (DCMF_VERSION_MAJOR >= 3)
	DCMF_NetworkCoord_t nc;
	DCMF_Messager_rank2network(i, DCMF_DEFAULT_NETWORK, &nc);
	tmp_x = nc.torus.x;
	tmp_y = nc.torus.y;
	tmp_z = nc.torus.z;
	tmp_t = nc.torus.t;
    #else
	DCMF_Messager_rank2torus(c, &tmp_x, &tmp_y, &tmp_z, &tmp_t);
    #endif
	CkPrintf("Real Processor %d ---> x %d y %d z %d t %d\n", i, tmp_x, tmp_y, tmp_z, tmp_t);
#elif XT3_TOPOLOGY
	int tmp_t, tmp_x, tmp_y, tmp_z;
	xt3tm.realRankToCoordinates(i, tmp_x, tmp_y, tmp_z, tmp_t);
	CkPrintf("Real Processor %d ---> x %d y %d z %d t %d\n", i, tmp_x, tmp_y, tmp_z, tmp_t);
#elif XT4_TOPOLOGY || XT5_TOPOLOGY
	int tmp_t, tmp_x, tmp_y, tmp_z;
	xttm.realRankToCoordinates(i, tmp_x, tmp_y, tmp_z, tmp_t);
	CkPrintf("Real Processor %d ---> x %d y %d z %d t %d\n", i, tmp_x, tmp_y, tmp_z, tmp_t);
#endif
      } // end of for loop

      int size = tmgr.getDimNX() * tmgr.getDimNY() * tmgr.getDimNZ();
      CkPrintf("Torus Contiguity Metric %d : %d [%f] \n", size, CkNumPes()/tmgr.getDimNT(), (float)(CkNumPes())/(tmgr.getDimNT()*size) );
      CkExit();
    };
Ejemplo n.º 2
0
ComputeMap::ComputeMap(int x, int y, int z, int tx, int ty, int tz) {
  X = x;
  Y = y;
  Z = z;
  mapping = new int[X*Y*Z];

  TopoManager tmgr;
  int dimX, dimY, dimZ, dimT;

#if USE_TOPOMAP
  dimX = tmgr.getDimNX();
  dimY = tmgr.getDimNY();
  dimZ = tmgr.getDimNZ();
  dimT = tmgr.getDimNT();
#elif USE_BLOCKMAP
  dimX = tx;
  dimY = ty;
  dimZ = tz;
  dimT = 1;
#endif

  // we are assuming that the no. of chares in each dimension is a 
  // multiple of the torus dimension
  int numCharesPerPe = X*Y*Z/CkNumPes();

  int numCharesPerPeX = X / dimX;
  int numCharesPerPeY = Y / dimY;
  int numCharesPerPeZ = Z / dimZ;

  if(dimT < 2) {    // one core per node
    if(CkMyPe()==0) CkPrintf("DATA: %d %d %d %d : %d %d %d\n", dimX, dimY, dimZ, dimT, numCharesPerPeX, numCharesPerPeY, numCharesPerPeZ);
    for(int i=0; i<dimX; i++)
      for(int j=0; j<dimY; j++)
        for(int k=0; k<dimZ; k++)
          for(int ci=i*numCharesPerPeX; ci<(i+1)*numCharesPerPeX; ci++)
            for(int cj=j*numCharesPerPeY; cj<(j+1)*numCharesPerPeY; cj++)
              for(int ck=k*numCharesPerPeZ; ck<(k+1)*numCharesPerPeZ; ck++) {
#if USE_TOPOMAP
                mapping[ci*Y*Z + cj*Z + ck] = tmgr.coordinatesToRank(i, j, k);
#elif USE_BLOCKMAP
                mapping[ci*Y*Z + cj*Z + ck] = i + j*dimX + k*dimX*dimY;
#endif
              }
  } else {          // multiple cores per node
    // In this case, we split the chares in the X dimension among the
    // cores on the same node.
    numCharesPerPeX /= dimT;
      if(CkMyPe()==0) CkPrintf("%d %d %d : %d %d %d %d : %d %d %d \n", x, y, z, dimX, dimY, dimZ, dimT, numCharesPerPeX, numCharesPerPeY, numCharesPerPeZ);
      for(int i=0; i<dimX; i++)
        for(int j=0; j<dimY; j++)
          for(int k=0; k<dimZ; k++)
            for(int l=0; l<dimT; l++)
              for(int ci=(dimT*i+l)*numCharesPerPeX; ci<(dimT*i+l+1)*numCharesPerPeX; ci++)
                for(int cj=j*numCharesPerPeY; cj<(j+1)*numCharesPerPeY; cj++)
                  for(int ck=k*numCharesPerPeZ; ck<(k+1)*numCharesPerPeZ; ck++) {
                    mapping[ci*Y*Z + cj*Z + ck] = tmgr.coordinatesToRank(i, j, k, l);
                  }
  }
}
Ejemplo n.º 3
0
void build_process_map(int size, int *smap, int *rmap, int *pmap, int file)
{
  TopoManager tmgr;
  int pe, pe1, pe2, x, y, z1, t;
  int dimNX, dimNY, dimNZ, dimNT;
  dimNX = tmgr.getDimNX();
  dimNY = tmgr.getDimNY();
  dimNZ = tmgr.getDimNZ();
  dimNT = tmgr.getDimNT();
  int count = 0;
  for(int i=0; i<size; i++)
  {
      smap[i]=-1;
      rmap[i]=-1;
      pmap[i]=-1;

  }
  cout << "Loading Map" << endl;
  char name[50];
  sprintf(name,"%d.map",file);
  ifstream mapFile(name);
  string line_s;
  while(mapFile.good()   ){
    #ifdef DEBUG
      cout << "     >  Loading " << name << endl;
    #endif
    int c1,c2,c3,c4,c5,c6;
    getline(mapFile,line_s);
    istringstream line(line_s);
    line >> c1 >> c2 >>c3 >> c4 >> c5>> c6;

    for(int i=0;i<dimNZ;i++)
    {
      pe = tmgr.coordinatesToRank(c1, c2, i, 0);
      pe1 = tmgr.coordinatesToRank(c4, c5, i, 0);
      smap[pe] = pe1;
      rmap[pe1] = pe;
      if(i==0)
      {
	pmap[pe] =1;
	pmap[pe1]=2;
      }
    }
  }
  dump_map(size,rmap);
  dump_map(size,smap);
}
Ejemplo n.º 4
0
int main(int argc, char *argv[]) {
  int numprocs, myrank;
  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
  MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
  double sendTime, recTime, min, avg, max;
  double time[3] = {0.0, 0.0, 0.0};
  int msg_size;
  MPI_Status mstat;
  int i=0,j, pe, pe1, pe2, trial, hops;
  char name[30];
  char locname[30];
  char blockname[50];
  double newTime, oldTime;
  double storeTime[NUM_MSGS];
  double recvTime[NUM_MSGS];
  double storeBw[NUM_MSGS];
  char *send_buf = (char *)malloc(MAX_MSG_SIZE);
  char *recv_buf = (char *)malloc(MAX_MSG_SIZE);
  FILE *locf;
  for(i = 0; i < MAX_MSG_SIZE; i++) {
    recv_buf[i] = send_buf[i] = (char) (i & 0xff);
  }

  // allocate the routing map.
  int *rmap = (int *) malloc(sizeof(int) * numprocs);
  int *smap = (int *) malloc(sizeof(int) * numprocs);
  
  TopoManager *tmgr;
  int dimNZ, numRG, x, y, z, t, bcastSend[3], bcastRecv[3];

  if(myrank == 0) {
    tmgr = new TopoManager();
#if CREATE_JOBS
    numRG = tmgr->getDimNX() * (tmgr->getDimNY() - 2) * 2 * tmgr->getDimNT();
#else
    numRG = tmgr->getDimNX() * tmgr->getDimNY() * 2;
#endif
    dimNZ = tmgr->getDimNZ();
    for (int i=1; i<numprocs; i++) {
      bcastSend[0] = dimNZ;
      bcastSend[1] = numRG;
      tmgr->rankToCoordinates(i, x, y, z, t);
      bcastSend[2] = z;
      MPI_Send(bcastSend, 3, MPI_INT, i, 1, MPI_COMM_WORLD);
    }
    tmgr->rankToCoordinates(0, x, y, z, t);
  } else {
      MPI_Recv(bcastRecv, 3, MPI_INT, 0, 1, MPI_COMM_WORLD, &mstat);
      dimNZ = bcastRecv[0];
      numRG = bcastRecv[1];
      z = bcastRecv[2];
  }
  MPI_Barrier(MPI_COMM_WORLD);
  if (myrank == 0) {
    printf("Torus Dimensions %d %d %d %d\n", tmgr->getDimNX(), tmgr->getDimNY(), dimNZ, tmgr->getDimNT());
  }
#if USE_HPM
    HPM_Init();
#endif
  for (hops=0; hops < 1; hops++) {
    // To print the recv times for certain ranks
    int *pmap = (int *) malloc(sizeof(int) * numprocs);
    if (myrank == 0) {
      // Rank 0 makes up a routing map.
      build_process_map(numprocs, smap, rmap, pmap, 2);
    }
    // Broadcast the routing map.
    MPI_Bcast(smap, numprocs, MPI_INT, 0, MPI_COMM_WORLD);
    MPI_Bcast(rmap, numprocs, MPI_INT, 0, MPI_COMM_WORLD);
    MPI_Bcast(pmap, numprocs, MPI_INT, 0, MPI_COMM_WORLD);
    sprintf(blockname, "Block_%d.hpm",hops);
    if (myrank == 0) {
       printf( " Broadcasted the map \n");
    }
#if USE_HPM
    HPM_Start(blockname);
#endif
#if CREATE_JOBS
    sprintf(name, "xt4_job_%d_%d.dat", numprocs, hops);
#else
    sprintf(name, "bgp_line_%d_%d.dat", numprocs, hops);
#endif
	if(pmap[myrank]>0)
	{
	sprintf(locname, "bgp_print_%d.dat", myrank);
	locf = fopen(locname, "a");
   	}
    for (msg_size=MIN_MSG_SIZE; msg_size<=MAX_MSG_SIZE; msg_size=(msg_size<<1)) {
      for (trial=0; trial<1; trial++) {
	 if (myrank == 0) {
	     printf( " Going to begin the trial \n");
	  }
	pe1 = smap[myrank]; // Am I a sender?
	pe2 = rmap[myrank]; // Am I a reciever? 
	MPI_Barrier(MPI_COMM_WORLD);
	// Actual Data Transfer
	if(pe1 != -1) {
	    sendTime = MPI_Wtime();
	    oldTime = sendTime;
	    j=0;
	    for(i=0; i<NUM_MSGS; i++)
	    {
		  storeTime[i] = MPI_Wtime(); // Just before the next send operation
		  MPI_Send(send_buf, msg_size, MPI_CHAR, pe1, 1, MPI_COMM_WORLD);
	    }
	    MPI_Recv(recv_buf, msg_size, MPI_CHAR, pe1, 1, MPI_COMM_WORLD, &mstat);
	    recTime = (MPI_Wtime() - sendTime) / (NUM_MSGS+1);
	    //printf(" My Rank : %d Experiment: %d  MSG_SIZE: %d -- Completed send recv \n", myrank, hops, msg_size);
	  }
	if(pe2 !=1)
	{
	    sendTime = MPI_Wtime();
	    oldTime = sendTime;
	    j=0;
	    for(i=0; i<NUM_MSGS; i++)
	      {
		  MPI_Recv(recv_buf, msg_size, MPI_CHAR, pe2, 1, MPI_COMM_WORLD, &mstat);
		  recvTime[i] = MPI_Wtime(); // Just after the next recv operation
	      }	  
	    MPI_Send(send_buf, msg_size, MPI_CHAR, pe2, 1, MPI_COMM_WORLD);
	    recTime = (MPI_Wtime() - sendTime) / (NUM_MSGS+1);
        }
	// Recv times sent back to the Senders for b/w calculations 
	if(myrank==0)
	{
	  printf(" My Rank : %d Experiment: %d  MSG_SIZE: %d -- Reached barrier in middle \n", myrank, hops, msg_size);
	}
	pe1 = smap[myrank]; // Am I a sender?
	pe2 = rmap[myrank]; // Am I a reciever? 
	MPI_Barrier(MPI_COMM_WORLD);
	if(pe1 != -1) {
	    MPI_Recv(recvTime, NUM_MSGS, MPI_DOUBLE, pe1, 1, MPI_COMM_WORLD, &mstat);
	    if(pmap[myrank]==1)
	    {
	      printf(" My Rank : %d Hops: %d  MSG_SIZE: %d Sender Side Exp trial: %d   Avg recv time %g \n", myrank, hops, msg_size, trial, recTime );
	      //printf(" My Rank : %d Hops: %d  MSG_SIZE: %d Sender Side Exp trial: %d   Recv time %g \n", myrank, hops, msg_size, trial, recvTime );
	      for(i=0;i<NUM_MSGS; i++)
		{
		  storeBw[i]= msg_size/(recvTime[i] - storeTime[i]);
		  fprintf(locf,  "%d   %d   %d   %g   %g   %g   %g \n", hops, myrank, msg_size, 500000*(storeTime[i]+recvTime[i]), storeBw[i],1000000*recvTime[i],1000000*storeTime[i]); 
		}
	    }
	  }
	if(pe2 !=1) 
	    {
	      MPI_Send(recvTime, NUM_MSGS, MPI_DOUBLE, pe2, 1, MPI_COMM_WORLD);
	    }
      } // end for loop of trials
    } // end for loop of msgs
    if(pmap[myrank]>0)
	{
  		fflush(NULL);
		fclose(locf);
	}
    free(pmap);
#if USE_HPM
  HPM_Stop(blockname);
#endif
  } // end for loop of hops
#if USE_HPM
  HPM_Print();
#endif
  if(myrank == 0)
    printf("Program Complete\n");
  MPI_Finalize();
  return 0;
}
Ejemplo n.º 5
0
void build_process_map(int size, int *map, int dist, int numRG, int *mapRG)
{
  TopoManager tmgr;
  int pe1, pe2, x, y, z, t;
  int dimNX, dimNY, dimNZ, dimNT;

  dimNX = tmgr.getDimNX();
  dimNY = tmgr.getDimNY();
  dimNZ = tmgr.getDimNZ();
  dimNT = tmgr.getDimNT();

  int count = 0;

#if CREATE_JOBS
  for(int i=0; i<size; i++)
    map[i] = -1;

  // assumes a cubic partition such as 8 x 8 x 8
  // inner brick is always used
  for(int i=0; i<dimNX; i++)
    for(int j=1; j<dimNY-1; j++)
      for(int k=1; k<dimNZ-1; k++)
	for(int l=0; l<dimNT; l++) {
          if(k == 2 || k == dimNZ-3) {
	    pe1 = tmgr.coordinatesToRank(i, j, k, l);
            if(k == 2)
              pe2 = tmgr.coordinatesToRank(i, j, dimNZ-3, l);
            else
              pe2 = tmgr.coordinatesToRank(i, j, 2, l);
	    map[pe1] = pe2;
            mapRG[count++] = pe1;
	    printf("%d ", pe1);
	  }
	}

  printf("\n");
  if(dist == 1) {
    // outer brick is used only when dist == 1
    for(int i=0; i<dimNX; i++)
      for(int j=0; j<dimNY; j++)
	for(int k=0; k<dimNZ; k++)
	  for(int l=0; l<dimNT; l++) {
	    if(j == 0 || j == dimNY-1 || k == 0 || k == dimNZ-1) {
	      pe1 = tmgr.coordinatesToRank(i, j, k, l);
	      pe2 = tmgr.coordinatesToRank(i, k, j, l);
	      if(j == 0 && k == 0)
		pe2 = tmgr.coordinatesToRank(i, dimNY-1, dimNZ-1, l);
	      else if(j == dimNY-1 && k == dimNZ-1)
		pe2 = tmgr.coordinatesToRank(i, 0, 0, l);
	      map[pe1] = pe2;
	    }
	  }
  }
#else
  for(int i=0; i<dimNX; i++)
    for(int j=0; j<dimNY; j++)
      for(int k=0; k<dimNZ; k++)
	for(int l=0; l<dimNT; l++) {
	  pe1 = tmgr.coordinatesToRank(i, j, k, l);
	  if( abs(dimNZ - 1 - 2*k) <= (2*dist+1) ) {
	    pe2 = tmgr.coordinatesToRank(i, j, (dimNZ-1-k), l);
	    map[pe1] = pe2;

	    if(i==0 && j==0 && l==0) {
	      printf("Hops %d [%d] [%d]\n", 2*dist+1, pe1, pe2);
	    }

	    if(k == dimNZ/2-1 || k == dimNZ/2)
	      mapRG[count++] = pe1;
	  } else
	    map[pe1] = -1;
	}
#endif
  printf("Barrier Process %d %d\n", count, numRG);
  check_map(size, map);
}
Ejemplo n.º 6
0
int main(int argc, char *argv[]) {
  int numprocs, myrank, grank;
  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
  MPI_Comm_rank(MPI_COMM_WORLD, &myrank);

  MPI_Group orig_group, new_group; 
  MPI_Comm new_comm; 

  /* Extract the original group handle */ 
  MPI_Comm_group(MPI_COMM_WORLD, &orig_group); 

  double sendTime, recvTime, min, avg, max;
  double time[3] = {0.0, 0.0, 0.0};
  int msg_size;
  MPI_Status mstat;
  int i=0, pe, trial, hops;
  char name[30];

  char *send_buf = (char *)malloc(MAX_MSG_SIZE);
  char *recv_buf = (char *)malloc(MAX_MSG_SIZE);

  for(i = 0; i < MAX_MSG_SIZE; i++) {
    recv_buf[i] = send_buf[i] = (char) (i & 0xff);
  }

  // allocate the routing map.
  int *map = (int *) malloc(sizeof(int) * numprocs);
  TopoManager *tmgr;
  int dimNZ, numRG, x, y, z, t, bcastSend[3], bcastRecv[3];

  if(myrank == 0) {
    tmgr = new TopoManager();
#if CREATE_JOBS
    numRG = tmgr->getDimNX() * (tmgr->getDimNY() - 2) * 2 * tmgr->getDimNT();
#else
    numRG = tmgr->getDimNX() * tmgr->getDimNY() * 2 * tmgr->getDimNT();
#endif
    dimNZ = tmgr->getDimNZ();
    for (int i=1; i<numprocs; i++) {
      bcastSend[0] = dimNZ;
      bcastSend[1] = numRG;
      tmgr->rankToCoordinates(i, x, y, z, t);
      bcastSend[2] = z;
      MPI_Send(bcastSend, 3, MPI_INT, i, 1, MPI_COMM_WORLD);
    }
    tmgr->rankToCoordinates(0, x, y, z, t);
  } else {
      MPI_Recv(bcastRecv, 3, MPI_INT, 0, 1, MPI_COMM_WORLD, &mstat);
      dimNZ = bcastRecv[0];
      numRG = bcastRecv[1];
      z = bcastRecv[2];
  }

  MPI_Barrier(MPI_COMM_WORLD);

  if (myrank == 0) {
    printf("Torus Dimensions %d %d %d %d\n", tmgr->getDimNX(), tmgr->getDimNY(), dimNZ, tmgr->getDimNT());
  }

#if CREATE_JOBS
  for (hops=0; hops < 2; hops++) {
#else
  for (hops=0; hops < dimNZ/2; hops++) {
#endif
    int *mapRG = (int *) malloc(sizeof(int) * numRG);
    if (myrank == 0) {
      // Rank 0 makes up a routing map.
      build_process_map(numprocs, map, hops, numRG, mapRG);
    }

    // Broadcast the routing map.
    MPI_Bcast(map, numprocs, MPI_INT, 0, MPI_COMM_WORLD);
    MPI_Bcast(mapRG, numRG, MPI_INT, 0, MPI_COMM_WORLD);

    MPI_Group_incl(orig_group, numRG, mapRG, &new_group);
    MPI_Comm_create(MPI_COMM_WORLD, new_group, &new_comm);
    MPI_Group_rank(new_group, &grank);
    
#if CREATE_JOBS
    sprintf(name, "xt4_job_%d_%d.dat", numprocs, hops);
#else
    sprintf(name, "xt4_line_%d_%d.dat", numprocs, hops);
#endif
   
    for (msg_size=MIN_MSG_SIZE; msg_size<=MAX_MSG_SIZE; msg_size=(msg_size<<1)) {
      for (trial=0; trial<10; trial++) {

	pe = map[myrank];
	if(pe != -1) {
          if(grank != MPI_UNDEFINED) MPI_Barrier(new_comm);

	  if(myrank < pe) {
	    // warmup
	    for(i=0; i<2; i++) {
	      MPI_Send(send_buf, msg_size, MPI_CHAR, pe, 1, MPI_COMM_WORLD);
	      MPI_Recv(recv_buf, msg_size, MPI_CHAR, pe, 1, MPI_COMM_WORLD, &mstat);
	    }

	    sendTime = MPI_Wtime();
	    for(i=0; i<NUM_MSGS; i++)
	      MPI_Send(send_buf, msg_size, MPI_CHAR, pe, 1, MPI_COMM_WORLD);
	    for(i=0; i<NUM_MSGS; i++)
	      MPI_Recv(recv_buf, msg_size, MPI_CHAR, pe, 1, MPI_COMM_WORLD, &mstat);
	    recvTime = (MPI_Wtime() - sendTime) / NUM_MSGS;
	
	    // cooldown
	    for(i=0; i<2; i++) {
	      MPI_Send(send_buf, msg_size, MPI_CHAR, pe, 1, MPI_COMM_WORLD);
	      MPI_Recv(recv_buf, msg_size, MPI_CHAR, pe, 1, MPI_COMM_WORLD, &mstat);
	    }

	    if(grank != MPI_UNDEFINED) MPI_Barrier(new_comm);
	  } else {
	    // warmup
	    for(i=0; i<2; i++) {
	      MPI_Recv(recv_buf, msg_size, MPI_CHAR, pe, 1, MPI_COMM_WORLD, &mstat);
	      MPI_Send(send_buf, msg_size, MPI_CHAR, pe, 1, MPI_COMM_WORLD);
	    }

	    sendTime = MPI_Wtime();
	    for(i=0; i<NUM_MSGS; i++)
	      MPI_Recv(recv_buf, msg_size, MPI_CHAR, pe, 1, MPI_COMM_WORLD, &mstat);
	    for(i=0; i<NUM_MSGS; i++)
	      MPI_Send(send_buf, msg_size, MPI_CHAR, pe, 1, MPI_COMM_WORLD);
	    recvTime = (MPI_Wtime() - sendTime) / NUM_MSGS;

	    // cooldown
	    for(i=0; i<2; i++) {
	      MPI_Recv(recv_buf, msg_size, MPI_CHAR, pe, 1, MPI_COMM_WORLD, &mstat);
	      MPI_Send(send_buf, msg_size, MPI_CHAR, pe, 1, MPI_COMM_WORLD);
	    }

	    if(grank != MPI_UNDEFINED) MPI_Barrier(new_comm);
	  }

	  if(grank != MPI_UNDEFINED) {
  	    MPI_Allreduce(&recvTime, &min, 1, MPI_DOUBLE, MPI_MIN, new_comm);
  	    MPI_Allreduce(&recvTime, &avg, 1, MPI_DOUBLE, MPI_SUM, new_comm);
	    MPI_Allreduce(&recvTime, &max, 1, MPI_DOUBLE, MPI_MAX, new_comm);
          }

	  avg /= numRG;

	} // end if map[pe] != -1
	if(grank == 0) {
	  time[0] += min;
	  time[1] += avg;
	  time[2] += max;
	}
      } // end for loop of trials
      if (grank == 0) {
	FILE *outf = fopen(name, "a");
	fprintf(outf, "%d %g %g %g\n", msg_size, time[0]/10, time[1]/10, time[2]/10);
	fflush(NULL);
	fclose(outf);
	time[0] = time[1] = time[2] = 0.0;
      }
    } // end for loop of msgs
    free(mapRG);
  } // end for loop of hops

  if(grank == 0)
    printf("Program Complete\n");

  MPI_Finalize();
  return 0;
}