Ejemplo n.º 1
0
int main(int argc, char **argv)
{
    int r,p;
    int n, energy, niters, px, py;

    int rx, ry;
    int north, south, west, east;
    int bx, by, offx, offy;

    /* three heat sources */
    const int nsources = 3;
    int sources[nsources][2];
    int locnsources;             /* number of sources in my area */
    int locsources[nsources][2]; /* sources local to my rank */

    double t1, t2;

    int iter, i, j;

    double heat, rheat;

    int final_flag;

    /* initialize MPI envrionment */
    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &r);
    MPI_Comm_size(MPI_COMM_WORLD, &p);

    /* create shared memory communicator */
    MPI_Comm shmcomm;
    MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shmcomm);

    int sr, sp; // rank and size in shmem comm
    MPI_Comm_size(shmcomm, &sp);
    MPI_Comm_rank(shmcomm, &sr);

    // this code works only on comm world!
    if(sp != p) MPI_Abort(MPI_COMM_WORLD, 1);

    /* argument checking and setting */
    setup(r, p, argc, argv,
          &n, &energy, &niters, &px, &py, &final_flag);

    if (final_flag == 1) {
        MPI_Finalize();
        exit(0);
    }

    /* determine my coordinates (x,y) -- r=x*a+y in the 2d processor array */
    rx = r % px;
    ry = r / px;

    /* determine my four neighbors */
    north = (ry - 1) * px + rx; if (ry-1 < 0)   north = MPI_PROC_NULL;
    south = (ry + 1) * px + rx; if (ry+1 >= py) south = MPI_PROC_NULL;
    west = ry * px + rx - 1;    if (rx-1 < 0)   west = MPI_PROC_NULL;
    east = ry * px + rx + 1;    if (rx+1 >= px) east = MPI_PROC_NULL;

    /* decompose the domain */
    bx = n / px;    /* block size in x */
    by = n / py;    /* block size in y */
    offx = rx * bx; /* offset in x */
    offy = ry * by; /* offset in y */

    /* printf("%i (%i,%i) - w: %i, e: %i, n: %i, s: %i\n", r, ry,rx,west,east,north,south); */

    int size = (bx+2)*(by+2); /* process-local grid (including halos (thus +2)) */
    double *mem;
    MPI_Win win;
    MPI_Win_allocate_shared(2*size*sizeof(double), 1, MPI_INFO_NULL, shmcomm, &mem, &win);

    double *tmp;
    double *anew=mem; /* each rank's offset */
    double *aold=mem+size; /* second half is aold! */

    double *northptr, *southptr, *eastptr, *westptr;
    double *northptr2, *southptr2, *eastptr2, *westptr2;
    MPI_Aint sz;
    int dsp_unit;
    /* locate the shared memory region for each neighbor */
    MPI_Win_shared_query(win, north, &sz, &dsp_unit, &northptr);
    MPI_Win_shared_query(win, south, &sz, &dsp_unit, &southptr);
    MPI_Win_shared_query(win, east, &sz, &dsp_unit, &eastptr);
    MPI_Win_shared_query(win, west, &sz, &dsp_unit, &westptr);
    northptr2 = northptr+size;
    southptr2 = southptr+size;
    eastptr2 = eastptr+size;
    westptr2 = westptr+size;

    /* initialize three heat sources */
    init_sources(bx, by, offx, offy, n,
                 nsources, sources, &locnsources, locsources);

    t1 = MPI_Wtime(); /* take time */

    MPI_Win_lock_all(0, win);    
    for (iter = 0; iter < niters; ++iter) {
        /* refresh heat sources */
        for (i = 0; i < locnsources; ++i) {
            aold[ind(locsources[i][0],locsources[i][1])] += energy; /* heat source */
        }

	MPI_Win_sync(win);
	MPI_Barrier(shmcomm);

	/* exchange data with neighbors */
	if(north != MPI_PROC_NULL) {
	  for(i=0; i<bx; ++i) aold[ind(i+1,0)] = northptr2[ind(i+1,by)]; /* pack loop - last valid region */
	}
	if(south != MPI_PROC_NULL) {
	  for(i=0; i<bx; ++i) aold[ind(i+1,by+1)] = southptr2[ind(i+1,1)]; /* pack loop */
	}
	if(east != MPI_PROC_NULL) {
	  for(i=0; i<by; ++i) aold[ind(bx+1,i+1)] = eastptr2[ind(1,i+1)]; /* pack loop */
	}
	if(west != MPI_PROC_NULL) {
	  for(i=0; i<by; ++i) aold[ind(0,i+1)] = westptr2[ind(bx,i+1)]; /* pack loop */
	}

        /* update grid points */
        update_grid(bx, by, aold, anew, &heat);

        /* swap working arrays */
        tmp = anew; anew = aold; aold = tmp;

        /* optional - print image */
        if (iter == niters-1) printarr_par(iter, anew, n, px, py, rx, ry, bx, by, offx, offy, shmcomm);
    }

    MPI_Win_unlock_all(win);
    t2 = MPI_Wtime();

    /* get final heat in the system */
    MPI_Allreduce(&heat, &rheat, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
    if (!r) printf("[%i] last heat: %f time: %f\n", r, rheat, t2-t1);

    /* free working arrays and communication buffers */
    MPI_Win_free(&win);
    MPI_Comm_free(&shmcomm);

    MPI_Finalize();
}
int main(int argc, char **argv) {

  MPI_Init(&argc, &argv); 
  int r,p;
  MPI_Comm comm = MPI_COMM_WORLD;
  MPI_Comm_rank(comm, &r);
  MPI_Comm_size(comm, &p);
  int n, energy, niters;

  if (r==0) {
      // argument checking
      if(argc < 4) {
          if(!r) printf("usage: stencil_mpi <n> <energy> <niters>\n");
          MPI_Finalize();
          exit(1);
      }

      n = atoi(argv[1]); // nxn grid
      energy = atoi(argv[2]); // energy to be injected per iteration
      niters = atoi(argv[3]); // number of iterations
      
      // distribute arguments
      int args[3] = {n, energy, niters};
      MPI_Bcast(args, 3, MPI_INT, 0, comm);
  }
  else {
      int args[3];
      MPI_Bcast(args, 3, MPI_INT, 0, comm);
      n=args[0]; energy=args[1]; niters=args[2];
  }

  int pdims[2]={0,0};
  // compute good (rectangular) domain decomposition
  MPI_Dims_create(p, 2, pdims);
  int px = pdims[0];
  int py = pdims[1];

  // create Cartesian topology
  int periods[2] = {0,0};
  MPI_Comm topocomm;
  MPI_Cart_create(comm, 2, pdims, periods, 0, &topocomm);

  // get my local x,y coordinates
  int coords[2];
  MPI_Cart_coords(topocomm, r, 2, coords);
  int rx = coords[0];
  int ry = coords[1];

  int source, north, south, east, west;
  MPI_Cart_shift(topocomm, 0, 1, &west, &east);
  MPI_Cart_shift(topocomm, 1, 1, &north, &south);

  // decompose the domain
  int bx = n/px; // block size in x
  int by = n/py; // block size in y
  int offx = rx*bx; // offset in x
  int offy = ry*by; // offset in y

  //printf("[%i] (%i,%i) - w: %i, e: %i, n: %i, s: %i x*y: %i x %i\n", r, rx,ry,west,east,north,south, px, py);

  // allocate two work arrays
  double *aold = (double*)calloc(1,(bx+2)*(by+2)*sizeof(double)); // 1-wide halo zones!
  double *anew = (double*)calloc(1,(bx+2)*(by+2)*sizeof(double)); // 1-wide halo zones!
  double *tmp;

  // initialize three heat sources
  #define nsources 3
  int sources[nsources][2] = {{n/2,n/2}, {n/3,n/3}, {n*4/5,n*8/9}};
  int locnsources=0; // number of sources in my area
  int locsources[nsources][2]; // sources local to my rank
  for (int i=0; i<nsources; ++i) { // determine which sources are in my patch
    int locx = sources[i][0] - offx;
    int locy = sources[i][1] - offy;
    if(locx >= 0 && locx < bx && locy >= 0 && locy < by) {
      locsources[locnsources][0] = locx+1; // offset by halo zone
      locsources[locnsources][1] = locy+1; // offset by halo zone
      locnsources++;
    }
  }

  double t=-MPI_Wtime(); // take time
  // create north-south datatype
  MPI_Datatype north_south_type;
  MPI_Type_contiguous(bx, MPI_DOUBLE, &north_south_type);
  MPI_Type_commit(&north_south_type);
  // create east-west type
  MPI_Datatype east_west_type;
  MPI_Type_vector(by,1,bx+2,MPI_DOUBLE, &east_west_type);
  MPI_Type_commit(&east_west_type);

  double heat; // total heat in system
  for(int iter=0; iter<niters; ++iter) {
    // refresh heat sources
    for(int i=0; i<locnsources; ++i) {
      aold[ind(locsources[i][0],locsources[i][1])] += energy; // heat source
    }

    // exchange data with neighbors
    MPI_Request reqs[8];
    MPI_Isend(&aold[ind(1,1)] /* north */, 1, north_south_type, north, 9, topocomm, &reqs[0]);
    MPI_Isend(&aold[ind(1,by)] /* south */, 1, north_south_type, south, 9, topocomm, &reqs[1]);
    MPI_Isend(&aold[ind(bx,1)] /* east */, 1, east_west_type, east, 9, topocomm, &reqs[2]);
    MPI_Isend(&aold[ind(1,1)] /* west */, 1, east_west_type, west, 9, topocomm, &reqs[3]);
    MPI_Irecv(&aold[ind(1,0)] /* north */, 1, north_south_type, north, 9, topocomm, &reqs[4]);
    MPI_Irecv(&aold[ind(1,by+1)] /* south */, 1, north_south_type, south, 9, topocomm, &reqs[5]);
    MPI_Irecv(&aold[ind(bx+1,1)] /* west */, 1, east_west_type, east, 9, topocomm, &reqs[6]);
    MPI_Irecv(&aold[ind(0,1)] /* east */, 1, east_west_type, west, 9, topocomm, &reqs[7]);

    // update inner grid points
    heat = 0.0;
    for(int i=2; i<bx; ++i) {
      for(int j=2; j<by; ++j) {
        anew[ind(i,j)] = anew[ind(i,j)]/2.0 + (aold[ind(i-1,j)] + aold[ind(i+1,j)] + aold[ind(i,j-1)] + aold[ind(i,j+1)])/4.0/2.0;
        heat += anew[ind(i,j)];
      }
    }

    // wait for communication to complete
    MPI_Waitall(8, reqs, MPI_STATUS_IGNORE);

    // update outer grid points
    for(int i=2; i<bx; ++i) { // north, south -- two elements less per row (first and last) to avoid "double computation" in next loop!
      for(int j=1; j < by+1; j+=by-1) {
        anew[ind(i,j)] = anew[ind(i,j)]/2.0 + (aold[ind(i-1,j)] + aold[ind(i+1,j)] + aold[ind(i,j-1)] + aold[ind(i,j+1)])/4.0/2.0;
        heat += anew[ind(i,j)];
      }
    }

    // update outer grid points
    for(int i=1; i<bx+1; i+=bx-1) { // east, west -- full columns
      for(int j=1; j < by+1; ++j) {
        anew[ind(i,j)] = anew[ind(i,j)]/2.0 + (aold[ind(i-1,j)] + aold[ind(i+1,j)] + aold[ind(i,j-1)] + aold[ind(i,j+1)])/4.0/2.0;
        heat += anew[ind(i,j)];
      }
    }

    // swap arrays
    tmp=anew; anew=aold; aold=tmp; // swap arrays
    
    // optional - print image
  	if(iter == niters-1) printarr_par(iter, anew, n, px, py, rx, ry, bx, by, offx, offy, comm);
  }
  t+=MPI_Wtime();
  
  // get final heat in the system
  double rheat;
  MPI_Allreduce(&heat, &rheat, 1, MPI_DOUBLE, MPI_SUM, comm);
  if(!r) printf("[%i] last heat: %f time: %f\n", r, rheat, t);

  MPI_Finalize();
}
int main(int argc, char **argv)
{
    int rank, size;
    int n, energy, niters, px, py;

    int north, south, west, east;
    int bx, by, offx, offy;

    /* three heat sources */
    const int nsources = 3;
    int sources[nsources][2];
    int locnsources;             /* number of sources in my area */
    int locsources[nsources][2]; /* sources local to my rank */

    double t1, t2;

    int iter, i;

    double *aold, *anew, *tmp;

    double heat, rheat;

    int final_flag;

    /* initialize MPI envrionment */
    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    /* argument checking and setting */
    setup(rank, size, argc, argv,
          &n, &energy, &niters, &px, &py, &final_flag);

    if (final_flag == 1) {
        MPI_Finalize();
        exit(0);
    }

    /* Create a communicator with a topology */
    MPI_Comm cart_comm;
    int dims[2] = {0,0}, periods[2] = {0,0}, coords[2];
    MPI_Dims_create(size, 2, dims);
    MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 0, &cart_comm);
    MPI_Cart_coords(cart_comm, rank, 2, coords);

    /* determine my four neighbors */
    MPI_Cart_shift(cart_comm, 0, 1, &west, &east);
    MPI_Cart_shift(cart_comm, 1, 1, &north, &south);

    /* decompose the domain */
    bx = n / px;    /* block size in x */
    by = n / py;    /* block size in y */
    offx = coords[0] * bx; /* offset in x */
    offy = coords[1] * by; /* offset in y */

    /* printf("%i (%i,%i) - w: %i, e: %i, n: %i, s: %i\n", rank, ry,rx,west,east,north,south); */

    /* allocate working arrays & communication buffers */
    MPI_Alloc_mem((bx+2)*(by+2)*sizeof(double), MPI_INFO_NULL, &aold); /* 1-wide halo zones! */
    MPI_Alloc_mem((bx+2)*(by+2)*sizeof(double), MPI_INFO_NULL, &anew); /* 1-wide halo zones! */

    /* initialize three heat sources */
    init_sources(bx, by, offx, offy, n,
                 nsources, sources, &locnsources, locsources);

    /* create north-south datatype */
    MPI_Datatype north_south_type;
    MPI_Type_contiguous(bx, MPI_DOUBLE, &north_south_type);
    MPI_Type_commit(&north_south_type);

    /* create east-west type */
    MPI_Datatype east_west_type;
    MPI_Type_vector(by,1,by+2,MPI_DOUBLE, &east_west_type);
    MPI_Type_commit(&east_west_type);

    t1 = MPI_Wtime(); /* take time */

    for (iter = 0; iter < niters; ++iter) {

        /* refresh heat sources */
        for (i = 0; i < locnsources; ++i) {
            aold[ind(locsources[i][0],locsources[i][1])] += energy; /* heat source */
        }

        /* exchange data with neighbors */
        int counts[4] = {1, 1, 1, 1};
        MPI_Aint sdispls[4] = {ind(1,1), ind(1,by), ind(1,1), ind(bx,1)}; /* N, S, W, E */
        MPI_Aint rdispls[4] = {ind(1,0), ind(1,by+1), ind(0,1), ind(bx+1,1)};
        MPI_Datatype types[4] = {north_south_type, north_south_type, east_west_type, east_west_type};
        MPI_Neighbor_alltoallw(aold, counts, sdispls, types, anew, counts, rdispls, types, cart_comm);

        /* update grid points */
        update_grid(bx, by, aold, anew, &heat);

        /* swap working arrays */
        tmp = anew; anew = aold; aold = tmp;

        /* optional - print image */
        if (iter == niters-1)
            printarr_par(iter, anew, n, px, py, coords[0], coords[1],
                         bx, by, offx, offy, MPI_COMM_WORLD);
    }

    t2 = MPI_Wtime();

    /* free working arrays and communication buffers */
    MPI_Free_mem(aold);
    MPI_Free_mem(anew);

    MPI_Type_free(&east_west_type);
    MPI_Type_free(&north_south_type);

    /* get final heat in the system */
    MPI_Allreduce(&heat, &rheat, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
    if (!rank) printf("[%i] last heat: %f time: %f\n", rank, rheat, t2-t1);

    MPI_Finalize();
}
Ejemplo n.º 4
0
int main(int argc, char **argv) {

  MPI_Init(&argc, &argv); 
  int r,p;
  MPI_Comm comm = MPI_COMM_WORLD;
  MPI_Comm_rank(comm, &r);
  MPI_Comm_size(comm, &p);
  int n, energy, niters, px, py;

   if (r==0) {
       // argument checking
       if(argc < 6) {
           if(!r) printf("usage: stencil_mpi <n> <energy> <niters> <px> <py>\n");
           MPI_Finalize();
           exit(1);
       }

       n = atoi(argv[1]); // nxn grid
       energy = atoi(argv[2]); // energy to be injected per iteration
       niters = atoi(argv[3]); // number of iterations
       px=atoi(argv[4]); // 1st dim processes
       py=atoi(argv[5]); // 2nd dim processes

       if(px * py != p) MPI_Abort(comm, 1);// abort if px or py are wrong
       if(n % py != 0) MPI_Abort(comm, 2); // abort px needs to divide n
       if(n % px != 0) MPI_Abort(comm, 3); // abort py needs to divide n
       
       // distribute arguments
       int args[5] = {n, energy, niters, px,  py};
       MPI_Bcast(args, 5, MPI_INT, 0, comm);
   }
  else {
      int args[5];
      MPI_Bcast(args, 5, MPI_INT, 0, comm);
      n=args[0]; energy=args[1]; niters=args[2]; px=args[3]; py=args[4];
  }

  // determine my coordinates (x,y) -- r=x*a+y in the 2d processor array
  int rx = r % px;
  int ry = r / px;
  // determine my four neighbors
  int north = (ry-1)*px+rx; if(ry-1 < 0)   north = MPI_PROC_NULL;
  int south = (ry+1)*px+rx; if(ry+1 >= py) south = MPI_PROC_NULL;
  int west= ry*px+rx-1;     if(rx-1 < 0)   west = MPI_PROC_NULL;
  int east = ry*px+rx+1;    if(rx+1 >= px) east = MPI_PROC_NULL;
  // decompose the domain
  int bx = n/px; // block size in x
  int by = n/py; // block size in y
  int offx = rx*bx; // offset in x
  int offy = ry*by; // offset in y

  //printf("%i (%i,%i) - w: %i, e: %i, n: %i, s: %i\n", r, ry,rx,west,east,north,south);

  // allocate two work arrays
  double *aold = (double*)calloc(1,(bx+2)*(by+2)*sizeof(double)); // 1-wide halo zones!
  double *anew = (double*)calloc(1,(bx+2)*(by+2)*sizeof(double)); // 1-wide halo zones!
  double *tmp;

  // initialize three heat sources
  #define nsources 3
  int sources[nsources][2] = {{n/2,n/2}, {n/3,n/3}, {n*4/5,n*8/9}};
  int locnsources=0; // number of sources in my area
  int locsources[nsources][2]; // sources local to my rank
  for (int i=0; i<nsources; ++i) { // determine which sources are in my patch
    int locx = sources[i][0] - offx;
    int locy = sources[i][1] - offy;
    if(locx >= 0 && locx < bx && locy >= 0 && locy < by) {
      locsources[locnsources][0] = locx+1; // offset by halo zone
      locsources[locnsources][1] = locy+1; // offset by halo zone
      locnsources++;
    }
  }

  double t=-MPI_Wtime(); // take time
  // create north-south datatype
  MPI_Datatype north_south_type;
  MPI_Type_contiguous(bx, MPI_DOUBLE, &north_south_type);
  MPI_Type_commit(&north_south_type);
  // create east-west type
  MPI_Datatype east_west_type;
  MPI_Type_vector(by,1,bx+2,MPI_DOUBLE, &east_west_type);
  MPI_Type_commit(&east_west_type);

  double heat; // total heat in system
  for(int iter=0; iter<niters; ++iter) {
    // refresh heat sources
    for(int i=0; i<locnsources; ++i) {
      aold[ind(locsources[i][0],locsources[i][1])] += energy; // heat source
    }

    // exchange data with neighbors
    MPI_Request reqs[8];
    MPI_Isend(&aold[ind(1,1)] /* north */, 1, north_south_type, north, 9, comm, &reqs[0]);
    MPI_Isend(&aold[ind(1,by)] /* south */, 1, north_south_type, south, 9, comm, &reqs[1]);
    MPI_Isend(&aold[ind(bx,1)] /* east */, 1, east_west_type, east, 9, comm, &reqs[2]);
    MPI_Isend(&aold[ind(1,1)] /* west */, 1, east_west_type, west, 9, comm, &reqs[3]);
    MPI_Irecv(&aold[ind(1,0)] /* north */, 1, north_south_type, north, 9, comm, &reqs[4]);
    MPI_Irecv(&aold[ind(1,by+1)] /* south */, 1, north_south_type, south, 9, comm, &reqs[5]);
    MPI_Irecv(&aold[ind(bx+1,1)] /* west */, 1, east_west_type, east, 9, comm, &reqs[6]);
    MPI_Irecv(&aold[ind(0,1)] /* east */, 1, east_west_type, west, 9, comm, &reqs[7]);
    MPI_Waitall(8, reqs, MPI_STATUS_IGNORE);

    // update grid points
    heat = 0.0;
    for(int i=1; i<bx+1; ++i) {
      for(int j=1; j<by+1; ++j) {
        anew[ind(i,j)] = anew[ind(i,j)]/2.0 + (aold[ind(i-1,j)] + aold[ind(i+1,j)] + aold[ind(i,j-1)] + aold[ind(i,j+1)])/4.0/2.0;
        heat += anew[ind(i,j)];
      }
    }

    // swap arrays
    tmp=anew; anew=aold; aold=tmp;

    // optional - print image
    if(iter == niters-1) printarr_par(iter, anew, n, px, py, rx, ry, bx, by, offx, offy, comm);
  }
  t+=MPI_Wtime();

  MPI_Type_free(&east_west_type);
  MPI_Type_free(&north_south_type);

  // get final heat in the system
  double rheat;
  MPI_Allreduce(&heat, &rheat, 1, MPI_DOUBLE, MPI_SUM, comm);
  if(!r) printf("[%i] last heat: %f time: %f\n", r, rheat, t);

  MPI_Finalize();
}