// Perform one iteration of work // The first step is to send the local state to the neighbors void begin_iteration(void) { itercnt++; //ckout << "iter=" << itercnt << ":(" << thisIndex.x << "," << thisIndex.y << ") is on " << CkMyPe() << endl; // Copy left column and right column into temporary arrays double *left_edge = new double[block_height]; double *right_edge = new double[block_height]; for(int i=0;i<block_height;++i){ left_edge[i] = temperature[i+1][1]; right_edge[i] = temperature[i+1][block_width]; } // Send my left edge thisProxy(wrap_x(thisIndex.x-1), thisIndex.y).ghostsFromRight(block_height, left_edge); // Send my right edge thisProxy(wrap_x(thisIndex.x+1), thisIndex.y).ghostsFromLeft(block_height, right_edge); // Send my top edge thisProxy(thisIndex.x, wrap_y(thisIndex.y-1)).ghostsFromBottom(block_width, &temperature[1][1]); // Send my bottom edge thisProxy(thisIndex.x, wrap_y(thisIndex.y+1)).ghostsFromTop(block_width, &temperature[block_height][1]); hasSent=true; check_and_compute(); delete [] right_edge; delete [] left_edge; }
// Send ghost faces to the six neighbors void begin_iteration(void) { if (thisIndex.x == 0 && thisIndex.y == 0 && thisIndex.z == 0) { // CkPrintf("Start of iteration %d\n", iterations); if(iterations % PRINT_FREQ == 0) { average = timing; timing = CmiWallTimer(); average = (timing - average)/(double)PRINT_FREQ; CkPrintf("time=%.2f it=%d avg=%.4f\n",timing,iterations,average); } } iterations++; // Copy different faces into messages ghostMsg *leftMsg = new (blockDimY*blockDimZ) ghostMsg(RIGHT, blockDimY, blockDimZ); ghostMsg *rightMsg = new (blockDimY*blockDimZ) ghostMsg(LEFT, blockDimY, blockDimZ); ghostMsg *topMsg = new (blockDimX*blockDimZ) ghostMsg(BOTTOM, blockDimX, blockDimZ); ghostMsg *bottomMsg = new (blockDimX*blockDimZ) ghostMsg(TOP, blockDimX, blockDimZ); ghostMsg *frontMsg = new (blockDimX*blockDimY) ghostMsg(BACK, blockDimX, blockDimY); ghostMsg *backMsg = new (blockDimX*blockDimY) ghostMsg(FRONT, blockDimX, blockDimY); CkSetRefNum(leftMsg, iterations); CkSetRefNum(rightMsg, iterations); CkSetRefNum(topMsg, iterations); CkSetRefNum(bottomMsg, iterations); CkSetRefNum(frontMsg, iterations); CkSetRefNum(backMsg, iterations); for(int j=0; j<blockDimY; ++j) for(int k=0; k<blockDimZ; ++k) { leftMsg->gh[k*blockDimY+j] = temperature[index(1, j+1, k+1)]; rightMsg->gh[k*blockDimY+j] = temperature[index(blockDimX, j+1, k+1)]; } for(int i=0; i<blockDimX; ++i) for(int k=0; k<blockDimZ; ++k) { topMsg->gh[k*blockDimX+i] = temperature[index(i+1, 1, k+1)]; bottomMsg->gh[k*blockDimX+i] = temperature[index(i+1, blockDimY, k+1)]; } for(int i=0; i<blockDimX; ++i) for(int j=0; j<blockDimY; ++j) { frontMsg->gh[j*blockDimX+i] = temperature[index(i+1, j+1, 1)]; backMsg->gh[j*blockDimX+i] = temperature[index(i+1, j+1, blockDimZ)]; } // Send my left face thisProxy(wrap_x(thisIndex.x-1), thisIndex.y, thisIndex.z).receiveGhosts(leftMsg); // Send my right face thisProxy(wrap_x(thisIndex.x+1), thisIndex.y, thisIndex.z).receiveGhosts(rightMsg); // Send my top face thisProxy(thisIndex.x, wrap_y(thisIndex.y-1), thisIndex.z).receiveGhosts(topMsg); // Send my bottom face thisProxy(thisIndex.x, wrap_y(thisIndex.y+1), thisIndex.z).receiveGhosts(bottomMsg); // Send my front face thisProxy(thisIndex.x, thisIndex.y, wrap_z(thisIndex.z-1)).receiveGhosts(frontMsg); // Send my back face thisProxy(thisIndex.x, thisIndex.y, wrap_z(thisIndex.z+1)).receiveGhosts(backMsg); }
int main(int argc, char **argv) { int myRank, numPes; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numPes); MPI_Comm_rank(MPI_COMM_WORLD, &myRank); MPI_Request req[4]; MPI_Status status[4]; int blockDimX, blockDimY, arrayDimX, arrayDimY; int noBarrier = 0; if (argc != 4 && argc != 6) { printf("%s [array_size] [block_size] +[no]barrier\n", argv[0]); printf("%s [array_size_X] [array_size_Y] [block_size_X] [block_size_Y] +[no]barrier\n", argv[0]); MPI_Abort(MPI_COMM_WORLD, -1); } if(argc == 4) { arrayDimY = arrayDimX = atoi(argv[1]); blockDimY = blockDimX = atoi(argv[2]); if(strcasecmp(argv[3], "+nobarrier") == 0) noBarrier = 1; else noBarrier = 0; if(noBarrier && myRank==0) printf("\nSTENCIL COMPUTATION WITH NO BARRIERS\n"); } else { arrayDimX = atoi(argv[1]); arrayDimY = atoi(argv[2]); blockDimX = atoi(argv[3]); blockDimY = atoi(argv[4]); if(strcasecmp(argv[5], "+nobarrier") == 0) noBarrier = 1; else noBarrier = 0; if(noBarrier && myRank==0) printf("\nSTENCIL COMPUTATION WITH NO BARRIERS\n"); } if (arrayDimX < blockDimX || arrayDimX % blockDimX != 0) { printf("array_size_X mod block_size_X != 0!\n"); MPI_Abort(MPI_COMM_WORLD, -1); } if (arrayDimY < blockDimY || arrayDimY % blockDimY != 0) { printf("array_size_Y mod block_size_Y != 0!\n"); MPI_Abort(MPI_COMM_WORLD, -1); } int num_blocks_x = arrayDimX / blockDimX; int num_blocks_y = arrayDimY / blockDimY; int iterations = 0, i, j; double error = 1.0, max_error = 0.0; if(myRank == 0) { printf("Running Jacobi on %d processors with (%d, %d) elements\n", numPes, num_blocks_x, num_blocks_y); printf("Array Dimensions: %d %d\n", arrayDimX, arrayDimY); printf("Block Dimensions: %d %d\n", blockDimX, blockDimY); } double *dataX = new double[blockDimX]; double *dataY = new double[blockDimY]; int myRow = myRank / num_blocks_y; int myCol = myRank % num_blocks_y; AMPI_Set_startevent(MPI_COMM_WORLD); for(; iterations < 5; iterations++) { #if DO_COMM /* Receive my right, left, bottom and top edge */ MPI_Irecv(dataX, blockDimX, MPI_DOUBLE, calc_pe(myRow, wrap_y(myCol+1)), RIGHT, MPI_COMM_WORLD, &req[RIGHT-1]); MPI_Irecv(dataX, blockDimX, MPI_DOUBLE, calc_pe(myRow, wrap_y(myCol-1)), LEFT, MPI_COMM_WORLD, &req[LEFT-1]); MPI_Irecv(dataY, blockDimY, MPI_DOUBLE, calc_pe(wrap_x(myRow+1), myCol), BOTTOM, MPI_COMM_WORLD, &req[BOTTOM-1]); MPI_Irecv(dataY, blockDimY, MPI_DOUBLE, calc_pe(wrap_x(myRow-1), myCol), TOP, MPI_COMM_WORLD, &req[TOP-1]); /* Send my left, right, top and bottom edge */ MPI_Send(dataX, blockDimX, MPI_DOUBLE, calc_pe(myRow, wrap_y(myCol-1)), RIGHT, MPI_COMM_WORLD); MPI_Send(dataX, blockDimX, MPI_DOUBLE, calc_pe(myRow, wrap_y(myCol+1)), LEFT, MPI_COMM_WORLD); MPI_Send(dataY, blockDimY, MPI_DOUBLE, calc_pe(wrap_x(myRow-1), myCol), BOTTOM, MPI_COMM_WORLD); MPI_Send(dataY, blockDimY, MPI_DOUBLE, calc_pe(wrap_x(myRow+1), myCol), TOP, MPI_COMM_WORLD); MPI_Waitall(4, req, status); #endif #if !DO_COMM printf("%d %d %zd\n", myRank, calc_pe(myRow, wrap_y(myCol-1)), sizeof(double)*blockDimX); printf("%d %d %zd\n", myRank, calc_pe(myRow, wrap_y(myCol+1)), sizeof(double)*blockDimX); printf("%d %d %zd\n", myRank, calc_pe(wrap_x(myRow-1), myCol), sizeof(double)*blockDimY); printf("%d %d %zd\n", myRank, calc_pe(wrap_x(myRow+1), myCol), sizeof(double)*blockDimY); #endif } /* end of while loop */ if(myRank == 0) { printf("Completed %d iterations\n", iterations); } MPI_Finalize(); return 0; } /* end function main */
Main(CkArgMsg* m) { if ( (m->argc != 3) && (m->argc != 7) ) { CkPrintf("%s [array_size] [block_size]\n", m->argv[0]); CkPrintf("OR %s [array_size_X] [array_size_Y] [array_size_Z] [block_size_X] [block_size_Y] [block_size_Z]\n", m->argv[0]); CkAbort("Abort"); } // set iteration counter to zero iterations = 0; // store the main proxy mainProxy = thisProxy; if(m->argc == 3) { arrayDimX = arrayDimY = arrayDimZ = atoi(m->argv[1]); blockDimX = blockDimY = blockDimZ = atoi(m->argv[2]); } else if (m->argc == 7) { arrayDimX = atoi(m->argv[1]); arrayDimY = atoi(m->argv[2]); arrayDimZ = atoi(m->argv[3]); blockDimX = atoi(m->argv[4]); blockDimY = atoi(m->argv[5]); blockDimZ = atoi(m->argv[6]); } if (arrayDimX < blockDimX || arrayDimX % blockDimX != 0) CkAbort("array_size_X % block_size_X != 0!"); if (arrayDimY < blockDimY || arrayDimY % blockDimY != 0) CkAbort("array_size_Y % block_size_Y != 0!"); if (arrayDimZ < blockDimZ || arrayDimZ % blockDimZ != 0) CkAbort("array_size_Z % block_size_Z != 0!"); num_chare_x = arrayDimX / blockDimX; num_chare_y = arrayDimY / blockDimY; num_chare_z = arrayDimZ / blockDimZ; // print info CkPrintf("\nSTENCIL COMPUTATION WITH NO BARRIERS\n"); CkPrintf("Running Jacobi on %d processors with (%d, %d, %d) chares\n", CkNumPes(), num_chare_x, num_chare_y, num_chare_z); CkPrintf("Array Dimensions: %d %d %d\n", arrayDimX, arrayDimY, arrayDimZ); CkPrintf("Block Dimensions: %d %d %d\n", blockDimX, blockDimY, blockDimZ); // Create new array of worker chares #if USE_TOPOMAP CProxy_JacobiMap map = CProxy_JacobiMap::ckNew(num_chare_x, num_chare_y, num_chare_z); CkPrintf("Topology Mapping is being done ... \n"); CkArrayOptions opts(num_chare_x, num_chare_y, num_chare_z); opts.setMap(map); array = CProxy_Jacobi::ckNew(opts); #else array = CProxy_Jacobi::ckNew(num_chare_x, num_chare_y, num_chare_z); #endif TopoManager tmgr; CkArray *jarr = array.ckLocalBranch(); int jmap[num_chare_x][num_chare_y][num_chare_z]; int hops=0, p; for(int i=0; i<num_chare_x; i++) for(int j=0; j<num_chare_y; j++) for(int k=0; k<num_chare_z; k++) { jmap[i][j][k] = jarr->procNum(CkArrayIndex3D(i, j, k)); } for(int i=0; i<num_chare_x; i++) for(int j=0; j<num_chare_y; j++) for(int k=0; k<num_chare_z; k++) { p = jmap[i][j][k]; hops += tmgr.getHopsBetweenRanks(p, jmap[wrap_x(i+1)][j][k]); hops += tmgr.getHopsBetweenRanks(p, jmap[wrap_x(i-1)][j][k]); hops += tmgr.getHopsBetweenRanks(p, jmap[i][wrap_y(j+1)][k]); hops += tmgr.getHopsBetweenRanks(p, jmap[i][wrap_y(j-1)][k]); hops += tmgr.getHopsBetweenRanks(p, jmap[i][j][wrap_z(k+1)]); hops += tmgr.getHopsBetweenRanks(p, jmap[i][j][wrap_z(k-1)]); } CkPrintf("Total Hops: %d\n", hops); #ifdef JACOBI_OPENMP CProxy_OmpInitializer ompInit = CProxy_OmpInitializer::ckNew(4); #else //Start the computation start(); #endif }
int main(int argc, char **argv) { int myRank, numPes; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numPes); MPI_Comm_rank(MPI_COMM_WORLD, &myRank); MPI_Request sreq[2], rreq[2]; int blockDimX, arrayDimX, arrayDimY; if (argc != 2 && argc != 3) { printf("%s [array_size] \n", argv[0]); printf("%s [array_size_X] [array_size_Y] \n", argv[0]); MPI_Abort(MPI_COMM_WORLD, -1); } if(argc == 2) { arrayDimY = arrayDimX = atoi(argv[1]); } else { arrayDimX = atoi(argv[1]); arrayDimY = atoi(argv[2]); } if (arrayDimX % numPes != 0) { printf("array_size_X % numPes != 0!\n"); MPI_Abort(MPI_COMM_WORLD, -1); } blockDimX = arrayDimX / numPes; int iterations = 0, i, j; double error = 1.0, max_error = 0.0; if(myRank == 0) { printf("Running Jacobi on %d processors\n", numPes); printf("Array Dimensions: %d %d\n", arrayDimX, arrayDimY); printf("Block Dimensions: %d\n", blockDimX); } double **temperature; double **new_temperature; /* allocate two dimensional arrays */ temperature = new double*[blockDimX+2]; new_temperature = new double*[blockDimX+2]; for (i=0; i<blockDimX+2; i++) { temperature[i] = new double[arrayDimY]; new_temperature[i] = new double[arrayDimY]; } for(i=0; i<blockDimX+2; i++) { for(j=0; j<arrayDimY; j++) { temperature[i][j] = 0.5; new_temperature[i][j] = 0.5; } } // boundary conditions if(myRank < numPes/2) { for(i=1; i<=blockDimX; i++) temperature[i][0] = 1.0; } if(myRank == numPes-1) { for(j=arrayDimY/2; j<arrayDimY; j++) temperature[blockDimX][j] = 0.0; } MPI_Barrier(MPI_COMM_WORLD); MPI_Pcontrol(1); startTime = MPI_Wtime(); while(/*error > 0.001 &&*/ iterations < MAX_ITER) { iterations++; /* Receive my bottom and top edge */ MPI_Irecv(&temperature[blockDimX+1][0], arrayDimY, MPI_DOUBLE, wrap_x(myRank+1), BOTTOM, MPI_COMM_WORLD, &rreq[BOTTOM-1]); MPI_Irecv(&temperature[0][0], arrayDimY, MPI_DOUBLE, wrap_x(myRank-1), TOP, MPI_COMM_WORLD, &rreq[TOP-1]); /* Send my top and bottom edge */ MPI_Isend(&temperature[1][0], arrayDimY, MPI_DOUBLE, wrap_x(myRank-1), BOTTOM, MPI_COMM_WORLD, &sreq[BOTTOM-1]); MPI_Isend(&temperature[blockDimX][0], arrayDimY, MPI_DOUBLE, wrap_x(myRank+1), TOP, MPI_COMM_WORLD, &sreq[TOP-1]); MPI_Waitall(2, rreq, MPI_STATUSES_IGNORE); MPI_Waitall(2, sreq, MPI_STATUSES_IGNORE); for(i=1; i<blockDimX+1; i++) { for(j=0; j<arrayDimY; j++) { /* update my value based on the surrounding values */ new_temperature[i][j] = (temperature[i-1][j]+temperature[i+1][j]+temperature[i][wrap_y(j-1)]+temperature[i][wrap_y(j+1)]+temperature[i][j]) * 0.2; } } max_error = error = 0.0; for(i=1; i<blockDimX+1; i++) { for(j=0; j<arrayDimY; j++) { error = fabs(new_temperature[i][j] - temperature[i][j]); if(error > max_error) max_error = error; } } double **tmp; tmp = temperature; temperature = new_temperature; new_temperature = tmp; // boundary conditions if(myRank < numPes/2) { for(i=1; i<=blockDimX; i++) temperature[i][0] = 1.0; } if(myRank == numPes-1) { for(j=arrayDimY/2; j<arrayDimY; j++) temperature[blockDimX][j] = 0.0; } //if(myRank == 0) printf("Iteration %d %f %f %f\n", iterations, max_error, temperature[1][0], temperature[1][1]); MPI_Allreduce(&max_error, &error, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); } /* end of while loop */ MPI_Barrier(MPI_COMM_WORLD); MPI_Pcontrol(0); if(myRank == 0) { endTime = MPI_Wtime(); printf("Completed %d iterations\n", iterations); printf("Time elapsed: %f\n", endTime - startTime); } MPI_Finalize(); return 0; } /* end function main */