int main() { MPI_Init(NULL, NULL); /* MPI variables (in some sense) */ MPI_Comm comm; MPI_Status status; MPI_Request request; int size, rank, tag; int comm2d, disp, left, right, up, down, reorder; int dims[NDIMS], period[NDIMS], direction[NDIMS]; /* variable for the program */ int nx, ny, nxp, nyp, nxpe, nype; int i, j, iter; int lastcheck, checkinc; double max, delta; double avg, mean; char picName[20] = "edgeCHANGETHIS.pgm"; /* * find the size of the image do the arrays can be defined */ pgmsize(picName, &nx, &ny); comm = MPI_COMM_WORLD; MPI_Comm_size(comm, &size); tag = 1; /* Introduce Cartesian topology */ for(i=0; i<NDIMS; ++i) { dims[i] = 0; period[i] = FALSE; /* TRUE gives Cyclic */ direction[i] = i; /* shift along the same index as element of the array*/ } reorder = TRUE; /* allows the processes to become reordered to hopefully improve efficiency */ disp = 1; /* Shift by 1 */ MPI_Dims_create(size,NDIMS,dims); MPI_Cart_create(comm,NDIMS,dims,period,reorder,&comm2d); MPI_Comm_rank(comm2d,&rank); MPI_Cart_shift(comm2d,direction[1],disp,&left,&right); MPI_Cart_shift(comm2d,direction[0],disp,&up,&down); /* check the array is a reasonable size to be split up among the processors to be used and if not quit */ if(nx < dims[1] || ny < dims[0]) { if(ROOT == rank) { printf("too many processors running on job, %d in x direction but only %d elements, %d in y, %d elements\n", dims[1], nx, dims[0], ny); } return 1; } initialise_local_array_sizes(nx, ny, &nxp, &nyp, &nxpe, &nype, dims, rank, size); /* now declare the arrays necessary (note they can be different sizes on different processes*/ float localBuf[nxp][nyp]; float localEdge[nxp+2][nyp+2], localOld[nxp+2][nyp+2], localNew[nxp+2][nyp+2]; float globalImage[nx][ny]; /* * set the halos of all the appropriate arrays to 255 */ set_halos(localEdge,localOld, localNew, nxp, nyp); if(ROOT == rank) { printf("Reading in Picture\n"); pgmread(picName, globalImage, nx, ny); } /*set up all the datatypes that will need to be used*/ /*send contiguous halos*/ MPI_Datatype mcols; MPI_Type_contiguous(nyp, MPI_FLOAT, &mcols); MPI_Type_commit(&mcols); /*send non-conmtiguous halos*/ MPI_Datatype mrows; MPI_Type_vector(nxp, 1, nyp+2, MPI_FLOAT, &mrows); /*nyp+2 since will be used on nyp+2 size arrays*/ MPI_Type_commit(&mrows); /*scatter data to processes with same size arrays as ROOT*/ MPI_Datatype scatter[4]; MPI_Type_vector(nxp, nyp, ny, MPI_FLOAT, &scatter[3]); MPI_Type_commit(&scatter[3]); /*scatter data to processes with different size arrays than ROOT in dim[0]*/ MPI_Type_vector(nxp, nype, ny, MPI_FLOAT, &scatter[0]); MPI_Type_commit(&scatter[0]); /*scatter data to processes with different size arrays than ROOT in dim[1]*/ MPI_Type_vector(nxpe, nyp, ny, MPI_FLOAT, &scatter[1]); MPI_Type_commit(&scatter[1]); /*scatter data to processes with different size arrays than ROOT in dim[0] and dim[1]*/ MPI_Type_vector(nxpe, nype, ny, MPI_FLOAT, &scatter[4]); MPI_Type_commit(&scatter[4]); /* Scatter the data from processer 0 to the rest */ if(ROOT == rank) { printf("Scattering image\n"); scatter_data(globalImage, localBuf, ny, nxp, nyp, dims, rank, comm2d, scatter); } else { MPI_Recv(localBuf, nxp*nyp, MPI_FLOAT, 0, rank, comm2d, &status); } /* * set up the edge data to be used in computation */ for(i=0; i<nxp; ++i) { for(j=0; j<nyp; ++j) { localEdge[i+1][j+1] = localBuf[i][j]; localOld[i+1][j+1] = 255; } } /* * computation loop */ if(ROOT == rank) { printf("Performing update routine for %d iterations\n", ITERATIONS); } double t1, t2; t1 = MPI_Wtime(); tag = 2; lastcheck = checkinc = iter = 0; delta = 1; while(iter < ITERATIONS) { send_halos(localOld, left, right, up, down, comm2d, tag, nxp, nyp, mrows, mcols); avg = 0; for(i=1; i<nxp+1; ++i) { for(j=1; j<nyp+1; ++j) { localNew[i][j] = 0.25*(localOld[i-1][j] + localOld[i+1][j] + localOld[i][j-1] + localOld[i][j+1] - localEdge[i][j]); avg = avg + localNew[i][j]; } } max = 0; for(i=1; i<nxp+1; ++i) { for(j=1; j<nyp+1; ++j) { if(fabs(localNew[i][j] - localOld[i][j]) > max) { max = fabs(localNew[i][j] - localOld[i][j]); } localOld[i][j] = localNew[i][j]; } } /* * want to perform a calculation of the average pixel value and delta */ if(iter == lastcheck + checkinc) { lastcheck = iter; MPI_Reduce(&avg, &mean, 1, MPI_DOUBLE, MPI_SUM, ROOT, comm2d); MPI_Allreduce(&max, &delta, 1, MPI_DOUBLE, MPI_MAX, comm2d); if(ROOT == rank) { // printf("iteration %d, average pixel value is %f, current delta %f\n", iter, mean/(nx*ny), delta); } checkinc = (int)(delta*500); if(checkinc > 200) checkinc = 500; } ++iter; if(ITERATIONS == iter) { break; } } t2 = MPI_Wtime(); if(ROOT == rank) { printf("finished after %d iterations, delta was %f\n", iter-1, delta); printf("seconds per iteration: %f\n", (t2-t1)/(iter-1)); } for(i=0; i<nxp; ++i) { for(j=0; j<nyp; ++j) { localBuf[i][j] = localOld[i+1][j+1]; } } tag = 3; if(ROOT == rank) { printf("recieving back data\n"); receive_data(globalImage, localBuf, ny, nxp, nyp, dims, tag, rank, comm2d, scatter); } else { MPI_Issend(localBuf, nxp*nyp, MPI_FLOAT, ROOT, tag, comm2d, &request); MPI_Wait(&request, &status); } if(ROOT == rank) { pgmwrite("parpictureCHANGETHIS.pgm", globalImage, nx, ny); } MPI_Finalize(); return 0; }
main(int argc, char **argv) { //declare the required data structures int N =32; /* Matrix size */ /* Matrices and vectors */ float *A= malloc(MAXN*MAXN); int i,j; //code commented. was used for testing. /* float temp[64] = {1,2,3,4,5,6,7,8, 2,3,4,1,7,4,5,6, 2,3,2,1,2,2,1,1, 4,5,4,5,5,3,4,2, 1,4,8,4,3,7,6,6, 9,7,7,3,2,8,5,4, 8,6,4,1,1,5,3,3, 8,3,2,6,4,6,9,7}; for(i=0;i<N;i++){ for(j=0;j<N;j++) { *(A+((N*i)+j))=temp[i*N+j]; //printf(" %f",*(A+((8*i)+j))); } //printf("\n"); } */ float B[MAXN];// = {5,6,7,3,5,2,9,5}; float X[MAXN];// = {0,0,0,0,0,0,0,0}; int my_rank=0; /* My process rank */ int p; /* The number of processes */ //clock time recording variables double start_time,end_time=0.0; ///////////////////MPI code starts//////////////////// //status variable used to check status of communication operation. MPI_Status status; /* Let the system do what it needs to start up MPI */ MPI_Init(&argc, &argv); /* Get my process rank */ MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); /* Find out how many processes are being used */ MPI_Comm_size(MPI_COMM_WORLD, &p); if(my_rank==0) { /* Process program parameters */ N = parameters(argc, argv); /* Initialize A and B */ initialize_inputs(A, B, X,N); /* Print input matrices */ print_inputs(A, B,N); //Start clock and record the start time. start_time = MPI_Wtime(); } //broadcast the size of the matrix read by the to all processes. MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD); //we need all processes to wait here until all others arrive. //we need to make sure that the input matrix has been initialized //by process 0 and the marix size has been propogated to all processes. MPI_Barrier(MPI_COMM_WORLD); //declare the local variables int local_no_of_rows; //number of rows to be processesd by each process int local_matrix_size; //size of the matrix float local_norm_row[N]; //the current normaization row float local_matrix_A[N][N]; //the part of A matrix on which each process will work float local_matrix_B[N]; //the part of B matrix on which each process will work int rows_per_process[p]; //the number of rows distributed to each process float local_norm_B; //the element on which B will be normalized int displ[p]; //displacement variable int norm=0; //the index of the current normalizing row //lets begin. The loop is outermost loop of Gaussian elimination operation. for (norm = 0; norm < N - 1; norm++) { //lets scatter the data accross all processes. //This method scatters the matrix A, and broadcasts the current normalizing row, // number of rows each process will work on. scatter_data(norm, my_rank, p, A, N, &local_no_of_rows, &local_matrix_size, local_norm_row, &(local_matrix_A[0][0]), &rows_per_process[0]); //lets calculate the send counts and displacement vector for scatter of B matrix. if(my_rank==0) { //printf(" %d", *(rows_per_process)); *(displ)=0; for(j=1;j<p;j++) { *(displ+j) = rows_per_process[j-1]+ *(displ+j-1); //printf(" %d", *(rows_per_process+j)); } } //This method call scatter the matrix B. Different processes may have different //number of elements to work on, when the size of matrix is not completely divisible //by number of processes. Hence we have used MPI_Scatterv(), instead of MPI_Scatter MPI_Scatterv(B+norm+1, rows_per_process, displ, MPI_FLOAT,local_matrix_B,local_no_of_rows, MPI_FLOAT, 0, MPI_COMM_WORLD); //lets broadcast the element against which matrix B will be normalized. local_norm_B = B[norm]; MPI_Bcast(&local_norm_B, 1, MPI_FLOAT, 0, MPI_COMM_WORLD); //each process performs the following elimination operation on their //share of the matrix A and B. eliminate(local_matrix_size, local_no_of_rows, &local_norm_row[0], &(local_matrix_A[0][0]), norm, &(local_matrix_B[0]), local_norm_B); //we need to calculate the counts and displacement for the Gather operation //of the processed matrix A, after each iteration. int counts_for_gather[p]; int displacements_for_gather[p]; if(my_rank==0) { *(displacements_for_gather)=0; counts_for_gather[0] = rows_per_process[0]*local_matrix_size; for(j=1;j<p;j++) { counts_for_gather[j] = rows_per_process[j]*local_matrix_size; *(displacements_for_gather+j) = counts_for_gather[j-1]+ *(displacements_for_gather+j-1); } } //here we gather the processed matrix A from all processes and store it locally MPI_Gatherv(local_matrix_A, local_no_of_rows*local_matrix_size, MPI_FLOAT, A+(N*(norm+1)), counts_for_gather, displacements_for_gather, MPI_FLOAT, 0, MPI_COMM_WORLD); //similarly we gather the processed matrix B. MPI_Gatherv(local_matrix_B, local_no_of_rows, MPI_FLOAT, B+norm+1, rows_per_process, displ, MPI_FLOAT, 0, MPI_COMM_WORLD); } //We need to wait for al processes to complete before we go ahead with //back subsitution. MPI_Barrier(MPI_COMM_WORLD); //perform the back substitution operation only by process 0. int row,col; if(my_rank==0){ /* Back substitution */ for (row = N - 1; row >= 0; row--) { X[row] = B[row]; for (col = N-1; col > row; col--) { X[row] -= *(A+(N*row)+col) * X[col]; } X[row] /= *(A+(N*row)+col); } //Stop clock as operation is finished. end_time = MPI_Wtime(); //display X in matrix size is small. if (N < 100) { printf("\nX = ["); for (row = 0; row < N; row++) { printf("%5.2f%s", X[row], (row < N-1) ? "; " : "]\n"); } } //print the execution time for performance analysis purpose. printf("\n\nThe total execution time as recorded on process 0 = %f seconds!!\n!",end_time-start_time); } MPI_Finalize(); }