int main(int argc, char **argv) { int r,p; int n, energy, niters, px, py; int rx, ry; int north, south, west, east; int bx, by, offx, offy; /* three heat sources */ const int nsources = 3; int sources[nsources][2]; int locnsources; /* number of sources in my area */ int locsources[nsources][2]; /* sources local to my rank */ double t1, t2; int iter, i, j; double heat, rheat; int final_flag; /* initialize MPI envrionment */ MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &r); MPI_Comm_size(MPI_COMM_WORLD, &p); /* create shared memory communicator */ MPI_Comm shmcomm; MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shmcomm); int sr, sp; // rank and size in shmem comm MPI_Comm_size(shmcomm, &sp); MPI_Comm_rank(shmcomm, &sr); // this code works only on comm world! if(sp != p) MPI_Abort(MPI_COMM_WORLD, 1); /* argument checking and setting */ setup(r, p, argc, argv, &n, &energy, &niters, &px, &py, &final_flag); if (final_flag == 1) { MPI_Finalize(); exit(0); } /* determine my coordinates (x,y) -- r=x*a+y in the 2d processor array */ rx = r % px; ry = r / px; /* determine my four neighbors */ north = (ry - 1) * px + rx; if (ry-1 < 0) north = MPI_PROC_NULL; south = (ry + 1) * px + rx; if (ry+1 >= py) south = MPI_PROC_NULL; west = ry * px + rx - 1; if (rx-1 < 0) west = MPI_PROC_NULL; east = ry * px + rx + 1; if (rx+1 >= px) east = MPI_PROC_NULL; /* decompose the domain */ bx = n / px; /* block size in x */ by = n / py; /* block size in y */ offx = rx * bx; /* offset in x */ offy = ry * by; /* offset in y */ /* printf("%i (%i,%i) - w: %i, e: %i, n: %i, s: %i\n", r, ry,rx,west,east,north,south); */ int size = (bx+2)*(by+2); /* process-local grid (including halos (thus +2)) */ double *mem; MPI_Win win; MPI_Win_allocate_shared(2*size*sizeof(double), 1, MPI_INFO_NULL, shmcomm, &mem, &win); double *tmp; double *anew=mem; /* each rank's offset */ double *aold=mem+size; /* second half is aold! */ double *northptr, *southptr, *eastptr, *westptr; double *northptr2, *southptr2, *eastptr2, *westptr2; MPI_Aint sz; int dsp_unit; /* locate the shared memory region for each neighbor */ MPI_Win_shared_query(win, north, &sz, &dsp_unit, &northptr); MPI_Win_shared_query(win, south, &sz, &dsp_unit, &southptr); MPI_Win_shared_query(win, east, &sz, &dsp_unit, &eastptr); MPI_Win_shared_query(win, west, &sz, &dsp_unit, &westptr); northptr2 = northptr+size; southptr2 = southptr+size; eastptr2 = eastptr+size; westptr2 = westptr+size; /* initialize three heat sources */ init_sources(bx, by, offx, offy, n, nsources, sources, &locnsources, locsources); t1 = MPI_Wtime(); /* take time */ MPI_Win_lock_all(0, win); for (iter = 0; iter < niters; ++iter) { /* refresh heat sources */ for (i = 0; i < locnsources; ++i) { aold[ind(locsources[i][0],locsources[i][1])] += energy; /* heat source */ } MPI_Win_sync(win); MPI_Barrier(shmcomm); /* exchange data with neighbors */ if(north != MPI_PROC_NULL) { for(i=0; i<bx; ++i) aold[ind(i+1,0)] = northptr2[ind(i+1,by)]; /* pack loop - last valid region */ } if(south != MPI_PROC_NULL) { for(i=0; i<bx; ++i) aold[ind(i+1,by+1)] = southptr2[ind(i+1,1)]; /* pack loop */ } if(east != MPI_PROC_NULL) { for(i=0; i<by; ++i) aold[ind(bx+1,i+1)] = eastptr2[ind(1,i+1)]; /* pack loop */ } if(west != MPI_PROC_NULL) { for(i=0; i<by; ++i) aold[ind(0,i+1)] = westptr2[ind(bx,i+1)]; /* pack loop */ } /* update grid points */ update_grid(bx, by, aold, anew, &heat); /* swap working arrays */ tmp = anew; anew = aold; aold = tmp; /* optional - print image */ if (iter == niters-1) printarr_par(iter, anew, n, px, py, rx, ry, bx, by, offx, offy, shmcomm); } MPI_Win_unlock_all(win); t2 = MPI_Wtime(); /* get final heat in the system */ MPI_Allreduce(&heat, &rheat, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); if (!r) printf("[%i] last heat: %f time: %f\n", r, rheat, t2-t1); /* free working arrays and communication buffers */ MPI_Win_free(&win); MPI_Comm_free(&shmcomm); MPI_Finalize(); }
int main(int argc, char **argv) { MPI_Init(&argc, &argv); int r,p; MPI_Comm comm = MPI_COMM_WORLD; MPI_Comm_rank(comm, &r); MPI_Comm_size(comm, &p); int n, energy, niters; if (r==0) { // argument checking if(argc < 4) { if(!r) printf("usage: stencil_mpi <n> <energy> <niters>\n"); MPI_Finalize(); exit(1); } n = atoi(argv[1]); // nxn grid energy = atoi(argv[2]); // energy to be injected per iteration niters = atoi(argv[3]); // number of iterations // distribute arguments int args[3] = {n, energy, niters}; MPI_Bcast(args, 3, MPI_INT, 0, comm); } else { int args[3]; MPI_Bcast(args, 3, MPI_INT, 0, comm); n=args[0]; energy=args[1]; niters=args[2]; } int pdims[2]={0,0}; // compute good (rectangular) domain decomposition MPI_Dims_create(p, 2, pdims); int px = pdims[0]; int py = pdims[1]; // create Cartesian topology int periods[2] = {0,0}; MPI_Comm topocomm; MPI_Cart_create(comm, 2, pdims, periods, 0, &topocomm); // get my local x,y coordinates int coords[2]; MPI_Cart_coords(topocomm, r, 2, coords); int rx = coords[0]; int ry = coords[1]; int source, north, south, east, west; MPI_Cart_shift(topocomm, 0, 1, &west, &east); MPI_Cart_shift(topocomm, 1, 1, &north, &south); // decompose the domain int bx = n/px; // block size in x int by = n/py; // block size in y int offx = rx*bx; // offset in x int offy = ry*by; // offset in y //printf("[%i] (%i,%i) - w: %i, e: %i, n: %i, s: %i x*y: %i x %i\n", r, rx,ry,west,east,north,south, px, py); // allocate two work arrays double *aold = (double*)calloc(1,(bx+2)*(by+2)*sizeof(double)); // 1-wide halo zones! double *anew = (double*)calloc(1,(bx+2)*(by+2)*sizeof(double)); // 1-wide halo zones! double *tmp; // initialize three heat sources #define nsources 3 int sources[nsources][2] = {{n/2,n/2}, {n/3,n/3}, {n*4/5,n*8/9}}; int locnsources=0; // number of sources in my area int locsources[nsources][2]; // sources local to my rank for (int i=0; i<nsources; ++i) { // determine which sources are in my patch int locx = sources[i][0] - offx; int locy = sources[i][1] - offy; if(locx >= 0 && locx < bx && locy >= 0 && locy < by) { locsources[locnsources][0] = locx+1; // offset by halo zone locsources[locnsources][1] = locy+1; // offset by halo zone locnsources++; } } double t=-MPI_Wtime(); // take time // create north-south datatype MPI_Datatype north_south_type; MPI_Type_contiguous(bx, MPI_DOUBLE, &north_south_type); MPI_Type_commit(&north_south_type); // create east-west type MPI_Datatype east_west_type; MPI_Type_vector(by,1,bx+2,MPI_DOUBLE, &east_west_type); MPI_Type_commit(&east_west_type); double heat; // total heat in system for(int iter=0; iter<niters; ++iter) { // refresh heat sources for(int i=0; i<locnsources; ++i) { aold[ind(locsources[i][0],locsources[i][1])] += energy; // heat source } // exchange data with neighbors MPI_Request reqs[8]; MPI_Isend(&aold[ind(1,1)] /* north */, 1, north_south_type, north, 9, topocomm, &reqs[0]); MPI_Isend(&aold[ind(1,by)] /* south */, 1, north_south_type, south, 9, topocomm, &reqs[1]); MPI_Isend(&aold[ind(bx,1)] /* east */, 1, east_west_type, east, 9, topocomm, &reqs[2]); MPI_Isend(&aold[ind(1,1)] /* west */, 1, east_west_type, west, 9, topocomm, &reqs[3]); MPI_Irecv(&aold[ind(1,0)] /* north */, 1, north_south_type, north, 9, topocomm, &reqs[4]); MPI_Irecv(&aold[ind(1,by+1)] /* south */, 1, north_south_type, south, 9, topocomm, &reqs[5]); MPI_Irecv(&aold[ind(bx+1,1)] /* west */, 1, east_west_type, east, 9, topocomm, &reqs[6]); MPI_Irecv(&aold[ind(0,1)] /* east */, 1, east_west_type, west, 9, topocomm, &reqs[7]); // update inner grid points heat = 0.0; for(int i=2; i<bx; ++i) { for(int j=2; j<by; ++j) { anew[ind(i,j)] = anew[ind(i,j)]/2.0 + (aold[ind(i-1,j)] + aold[ind(i+1,j)] + aold[ind(i,j-1)] + aold[ind(i,j+1)])/4.0/2.0; heat += anew[ind(i,j)]; } } // wait for communication to complete MPI_Waitall(8, reqs, MPI_STATUS_IGNORE); // update outer grid points for(int i=2; i<bx; ++i) { // north, south -- two elements less per row (first and last) to avoid "double computation" in next loop! for(int j=1; j < by+1; j+=by-1) { anew[ind(i,j)] = anew[ind(i,j)]/2.0 + (aold[ind(i-1,j)] + aold[ind(i+1,j)] + aold[ind(i,j-1)] + aold[ind(i,j+1)])/4.0/2.0; heat += anew[ind(i,j)]; } } // update outer grid points for(int i=1; i<bx+1; i+=bx-1) { // east, west -- full columns for(int j=1; j < by+1; ++j) { anew[ind(i,j)] = anew[ind(i,j)]/2.0 + (aold[ind(i-1,j)] + aold[ind(i+1,j)] + aold[ind(i,j-1)] + aold[ind(i,j+1)])/4.0/2.0; heat += anew[ind(i,j)]; } } // swap arrays tmp=anew; anew=aold; aold=tmp; // swap arrays // optional - print image if(iter == niters-1) printarr_par(iter, anew, n, px, py, rx, ry, bx, by, offx, offy, comm); } t+=MPI_Wtime(); // get final heat in the system double rheat; MPI_Allreduce(&heat, &rheat, 1, MPI_DOUBLE, MPI_SUM, comm); if(!r) printf("[%i] last heat: %f time: %f\n", r, rheat, t); MPI_Finalize(); }
int main(int argc, char **argv) { int rank, size; int n, energy, niters, px, py; int north, south, west, east; int bx, by, offx, offy; /* three heat sources */ const int nsources = 3; int sources[nsources][2]; int locnsources; /* number of sources in my area */ int locsources[nsources][2]; /* sources local to my rank */ double t1, t2; int iter, i; double *aold, *anew, *tmp; double heat, rheat; int final_flag; /* initialize MPI envrionment */ MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); /* argument checking and setting */ setup(rank, size, argc, argv, &n, &energy, &niters, &px, &py, &final_flag); if (final_flag == 1) { MPI_Finalize(); exit(0); } /* Create a communicator with a topology */ MPI_Comm cart_comm; int dims[2] = {0,0}, periods[2] = {0,0}, coords[2]; MPI_Dims_create(size, 2, dims); MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 0, &cart_comm); MPI_Cart_coords(cart_comm, rank, 2, coords); /* determine my four neighbors */ MPI_Cart_shift(cart_comm, 0, 1, &west, &east); MPI_Cart_shift(cart_comm, 1, 1, &north, &south); /* decompose the domain */ bx = n / px; /* block size in x */ by = n / py; /* block size in y */ offx = coords[0] * bx; /* offset in x */ offy = coords[1] * by; /* offset in y */ /* printf("%i (%i,%i) - w: %i, e: %i, n: %i, s: %i\n", rank, ry,rx,west,east,north,south); */ /* allocate working arrays & communication buffers */ MPI_Alloc_mem((bx+2)*(by+2)*sizeof(double), MPI_INFO_NULL, &aold); /* 1-wide halo zones! */ MPI_Alloc_mem((bx+2)*(by+2)*sizeof(double), MPI_INFO_NULL, &anew); /* 1-wide halo zones! */ /* initialize three heat sources */ init_sources(bx, by, offx, offy, n, nsources, sources, &locnsources, locsources); /* create north-south datatype */ MPI_Datatype north_south_type; MPI_Type_contiguous(bx, MPI_DOUBLE, &north_south_type); MPI_Type_commit(&north_south_type); /* create east-west type */ MPI_Datatype east_west_type; MPI_Type_vector(by,1,by+2,MPI_DOUBLE, &east_west_type); MPI_Type_commit(&east_west_type); t1 = MPI_Wtime(); /* take time */ for (iter = 0; iter < niters; ++iter) { /* refresh heat sources */ for (i = 0; i < locnsources; ++i) { aold[ind(locsources[i][0],locsources[i][1])] += energy; /* heat source */ } /* exchange data with neighbors */ int counts[4] = {1, 1, 1, 1}; MPI_Aint sdispls[4] = {ind(1,1), ind(1,by), ind(1,1), ind(bx,1)}; /* N, S, W, E */ MPI_Aint rdispls[4] = {ind(1,0), ind(1,by+1), ind(0,1), ind(bx+1,1)}; MPI_Datatype types[4] = {north_south_type, north_south_type, east_west_type, east_west_type}; MPI_Neighbor_alltoallw(aold, counts, sdispls, types, anew, counts, rdispls, types, cart_comm); /* update grid points */ update_grid(bx, by, aold, anew, &heat); /* swap working arrays */ tmp = anew; anew = aold; aold = tmp; /* optional - print image */ if (iter == niters-1) printarr_par(iter, anew, n, px, py, coords[0], coords[1], bx, by, offx, offy, MPI_COMM_WORLD); } t2 = MPI_Wtime(); /* free working arrays and communication buffers */ MPI_Free_mem(aold); MPI_Free_mem(anew); MPI_Type_free(&east_west_type); MPI_Type_free(&north_south_type); /* get final heat in the system */ MPI_Allreduce(&heat, &rheat, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); if (!rank) printf("[%i] last heat: %f time: %f\n", rank, rheat, t2-t1); MPI_Finalize(); }
int main(int argc, char **argv) { MPI_Init(&argc, &argv); int r,p; MPI_Comm comm = MPI_COMM_WORLD; MPI_Comm_rank(comm, &r); MPI_Comm_size(comm, &p); int n, energy, niters, px, py; if (r==0) { // argument checking if(argc < 6) { if(!r) printf("usage: stencil_mpi <n> <energy> <niters> <px> <py>\n"); MPI_Finalize(); exit(1); } n = atoi(argv[1]); // nxn grid energy = atoi(argv[2]); // energy to be injected per iteration niters = atoi(argv[3]); // number of iterations px=atoi(argv[4]); // 1st dim processes py=atoi(argv[5]); // 2nd dim processes if(px * py != p) MPI_Abort(comm, 1);// abort if px or py are wrong if(n % py != 0) MPI_Abort(comm, 2); // abort px needs to divide n if(n % px != 0) MPI_Abort(comm, 3); // abort py needs to divide n // distribute arguments int args[5] = {n, energy, niters, px, py}; MPI_Bcast(args, 5, MPI_INT, 0, comm); } else { int args[5]; MPI_Bcast(args, 5, MPI_INT, 0, comm); n=args[0]; energy=args[1]; niters=args[2]; px=args[3]; py=args[4]; } // determine my coordinates (x,y) -- r=x*a+y in the 2d processor array int rx = r % px; int ry = r / px; // determine my four neighbors int north = (ry-1)*px+rx; if(ry-1 < 0) north = MPI_PROC_NULL; int south = (ry+1)*px+rx; if(ry+1 >= py) south = MPI_PROC_NULL; int west= ry*px+rx-1; if(rx-1 < 0) west = MPI_PROC_NULL; int east = ry*px+rx+1; if(rx+1 >= px) east = MPI_PROC_NULL; // decompose the domain int bx = n/px; // block size in x int by = n/py; // block size in y int offx = rx*bx; // offset in x int offy = ry*by; // offset in y //printf("%i (%i,%i) - w: %i, e: %i, n: %i, s: %i\n", r, ry,rx,west,east,north,south); // allocate two work arrays double *aold = (double*)calloc(1,(bx+2)*(by+2)*sizeof(double)); // 1-wide halo zones! double *anew = (double*)calloc(1,(bx+2)*(by+2)*sizeof(double)); // 1-wide halo zones! double *tmp; // initialize three heat sources #define nsources 3 int sources[nsources][2] = {{n/2,n/2}, {n/3,n/3}, {n*4/5,n*8/9}}; int locnsources=0; // number of sources in my area int locsources[nsources][2]; // sources local to my rank for (int i=0; i<nsources; ++i) { // determine which sources are in my patch int locx = sources[i][0] - offx; int locy = sources[i][1] - offy; if(locx >= 0 && locx < bx && locy >= 0 && locy < by) { locsources[locnsources][0] = locx+1; // offset by halo zone locsources[locnsources][1] = locy+1; // offset by halo zone locnsources++; } } double t=-MPI_Wtime(); // take time // create north-south datatype MPI_Datatype north_south_type; MPI_Type_contiguous(bx, MPI_DOUBLE, &north_south_type); MPI_Type_commit(&north_south_type); // create east-west type MPI_Datatype east_west_type; MPI_Type_vector(by,1,bx+2,MPI_DOUBLE, &east_west_type); MPI_Type_commit(&east_west_type); double heat; // total heat in system for(int iter=0; iter<niters; ++iter) { // refresh heat sources for(int i=0; i<locnsources; ++i) { aold[ind(locsources[i][0],locsources[i][1])] += energy; // heat source } // exchange data with neighbors MPI_Request reqs[8]; MPI_Isend(&aold[ind(1,1)] /* north */, 1, north_south_type, north, 9, comm, &reqs[0]); MPI_Isend(&aold[ind(1,by)] /* south */, 1, north_south_type, south, 9, comm, &reqs[1]); MPI_Isend(&aold[ind(bx,1)] /* east */, 1, east_west_type, east, 9, comm, &reqs[2]); MPI_Isend(&aold[ind(1,1)] /* west */, 1, east_west_type, west, 9, comm, &reqs[3]); MPI_Irecv(&aold[ind(1,0)] /* north */, 1, north_south_type, north, 9, comm, &reqs[4]); MPI_Irecv(&aold[ind(1,by+1)] /* south */, 1, north_south_type, south, 9, comm, &reqs[5]); MPI_Irecv(&aold[ind(bx+1,1)] /* west */, 1, east_west_type, east, 9, comm, &reqs[6]); MPI_Irecv(&aold[ind(0,1)] /* east */, 1, east_west_type, west, 9, comm, &reqs[7]); MPI_Waitall(8, reqs, MPI_STATUS_IGNORE); // update grid points heat = 0.0; for(int i=1; i<bx+1; ++i) { for(int j=1; j<by+1; ++j) { anew[ind(i,j)] = anew[ind(i,j)]/2.0 + (aold[ind(i-1,j)] + aold[ind(i+1,j)] + aold[ind(i,j-1)] + aold[ind(i,j+1)])/4.0/2.0; heat += anew[ind(i,j)]; } } // swap arrays tmp=anew; anew=aold; aold=tmp; // optional - print image if(iter == niters-1) printarr_par(iter, anew, n, px, py, rx, ry, bx, by, offx, offy, comm); } t+=MPI_Wtime(); MPI_Type_free(&east_west_type); MPI_Type_free(&north_south_type); // get final heat in the system double rheat; MPI_Allreduce(&heat, &rheat, 1, MPI_DOUBLE, MPI_SUM, comm); if(!r) printf("[%i] last heat: %f time: %f\n", r, rheat, t); MPI_Finalize(); }