int main (int argc, char *argv[]) { void inidat(), prtdat(), update(); int taskid, /* this task's unique id */ numworkers, /* number of worker processes */ numtasks, /* number of tasks */ averow,rows,offset,extra, /* for sending rows of data */ dest, source, /* to - from for message send-receive */ left,right, /* neighbor tasks */ msgtype, /* for message types */ rc,start,end, /* misc */ i,ix,iy,iz,it; /* loop variables */ inidat(NXPROB, NYPROB, u); prtdat(NXPROB, NYPROB, u, "initial.dat"); }
int main(int argc, char *argv[]) { void inidat(), update_outside_table(), update_inside_table(); float *table_u; /* array for grid */ int taskid, /* this task's unique id */ rc, /* MPI Abord */ numworkers, /* number of worker processes */ numtasks, /* number of tasks */ offset, /* for sending rows of data */ ix, iy, iz, it, /* loop variables */ sub_table_dimention, /* Inner sub table dimention for task's grids */ sub_x, sub_y, /* Sub table dimention for task's grids with extra 2 levels perimetrical */ nbrs[4], size, /* Size of idim specifying the number of processes in each dimension */ dims[2], /* Array of size ndims */ reorder = 1, /* Ranking may be reordered (true) or not (false) (logical) */ periods[2] = {0, 0}; /* Logical array of size ndims specifying whether the grid is periodic */ double start_time = 0, /* start time */ end_time = 0, /* end time */ process_clock = 0, /* process's duration */ master_clock = 0; /* master's duration */ /* First, find out my taskid and how many tasks are running */ MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numtasks); MPI_Comm_rank(MPI_COMM_WORLD, &taskid); /* Initialization */ numworkers = numtasks; if (NXPROB * NYPROB % numworkers != 0) { printf("Main ERROR: Number of tasks must be: %dx%d %% %d != 0 .\n", NXPROB, NYPROB, numworkers); return(EXIT_FAILURE); } /* Create Cartesian Topology */ MPI_Comm cartcomm; size = sqrt(numworkers); dims[0] = size; dims[1] = size; MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, reorder, &cartcomm); /* Blocks the caller until all group members have called it */ MPI_Barrier(cartcomm); /* Returns the shifted source and destination ranks, given a shift direction and amount */ MPI_Cart_shift(cartcomm, 0, 1, &nbrs[UP], &nbrs[DOWN]); MPI_Cart_shift(cartcomm, 1, 1, &nbrs[LEFT], &nbrs[RIGHT]); sub_table_dimention = sqrt(NXPROB * NYPROB / numtasks); sub_x = sub_table_dimention + 2; sub_y = sub_table_dimention + 2; /* Allocate memory dynamically */ table_u = (float*) malloc((2 * sub_x * sub_y) * sizeof(float)); if (table_u == NULL) { printf("Main ERROR: Allocation memory.\n"); MPI_Abort(cartcomm, rc); return(EXIT_FAILURE); } /* Initialize grid tables with Zero */ for (iz = 0; iz < 2; iz++) { for (ix = 0; ix < sub_x; ix++) { for (iy = 0; iy < sub_y; iy++) { offset = iz * sub_x * sub_y + ix * sub_y + iy; *(table_u + offset) = 0.0; } } } /* Initialize Table */ inidat(sub_table_dimention, sub_table_dimention, sub_y, (table_u + sub_y + 1)); /* Datatype Definition */ MPI_Datatype COL_INT; MPI_Type_vector(sub_table_dimention, 1, sub_y, MPI_FLOAT, &COL_INT); MPI_Type_commit(&COL_INT); iz = 0; // initialize iz to first table MPI_Request req[8]; MPI_Status status[8]; /* Start Timer */ start_time = MPI_Wtime(); for (it = 1; it <= STEPS; it++){ /* Send and Receive asynchronous the shared values of neighbor */ if (nbrs[UP] >= 0){ MPI_Isend(table_u + iz*sub_x*sub_y + sub_y + 1, sub_table_dimention, MPI_FLOAT, nbrs[UP], DTAG, cartcomm, &req[0]); MPI_Irecv(table_u + iz*sub_x*sub_y + 1, sub_table_dimention, MPI_FLOAT, nbrs[UP], UTAG, cartcomm, &req[1]); } if (nbrs[DOWN] >= 0){ MPI_Isend(table_u + iz*sub_x*sub_y + sub_table_dimention*sub_y + 1, sub_table_dimention , MPI_FLOAT, nbrs[DOWN], UTAG, cartcomm, &req[2]); MPI_Irecv(table_u + iz*sub_x*sub_y + (sub_table_dimention+1)*sub_y + 1, sub_table_dimention , MPI_FLOAT, nbrs[DOWN], DTAG, cartcomm, &req[3]); } if (nbrs[LEFT] >= 0){ MPI_Isend(table_u + iz*sub_x*sub_y + sub_y + 1, 1, COL_INT, nbrs[LEFT], RTAG, cartcomm,&req[4]); MPI_Irecv(table_u + iz*sub_x*sub_y + sub_y, 1, COL_INT, nbrs[LEFT], LTAG, cartcomm, &req[5]); } if (nbrs[RIGHT] >= 0 ){ MPI_Isend(table_u + iz*sub_x*sub_y + sub_y + sub_table_dimention, 1, COL_INT, nbrs[RIGHT], LTAG, cartcomm,&req[6]); MPI_Irecv(table_u + iz*sub_x*sub_y + sub_y + sub_table_dimention + 1, 1, COL_INT , nbrs[RIGHT], RTAG, cartcomm,&req[7]); } /* Update inside table while the process wait for neighbor values */ update_inside_table(sub_table_dimention - 2, table_u + iz*sub_x*sub_y, table_u + (1-iz)*sub_x*sub_y); /* Wait for neighbor values */ if(nbrs[UP] >= 0){ MPI_Wait(&req[0],&status[0]); MPI_Wait(&req[1],&status[1]); } if(nbrs[DOWN] >= 0){ MPI_Wait(&req[2],&status[2]); MPI_Wait(&req[3],&status[3]); } if(nbrs[LEFT] >= 0){ MPI_Wait(&req[4],&status[4]); MPI_Wait(&req[5],&status[5]); } if(nbrs[RIGHT] >= 0){ MPI_Wait(&req[6],&status[6]); MPI_Wait(&req[7],&status[7]); } /* Update outside table with neighboor values */ update_outside_table(sub_table_dimention, table_u + iz*sub_x*sub_y, table_u + (1-iz)*sub_x*sub_y); /* Next loop with have to deal with the other table */ iz = 1 - iz; } /* Stop Timer */ end_time = MPI_Wtime(); process_clock = end_time - start_time; MPI_Reduce(&process_clock, &master_clock, 1, MPI_DOUBLE, MPI_MAX, 0, cartcomm); MPI_Barrier(cartcomm); /* Print out time elapsed */ if (taskid == MASTER){ printf("Total time elapsed for:\n\tTable [%d]x[%d] = %lf \n", NXPROB, NYPROB, master_clock); } /* Free resources */ free(table_u); MPI_Type_free(&COL_INT); MPI_Finalize(); }
int main (int argc, char *argv[]) { void inidat(), prtdat(), update(); float u[2][NXPROB][NYPROB]; /* array for grid */ int taskid, /* this task's unique id */ numworkers, /* number of worker processes */ numtasks, /* number of tasks */ averow,rows,offset,extra, /* for sending rows of data */ dest, source, /* to - from for message send-receive */ left,right, /* neighbor tasks */ msgtype, /* for message types */ rc,start,end, /* misc */ i,ix,iy,iz,it; /* loop variables */ MPI_Status status; /* First, find out my taskid and how many tasks are running */ MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&numtasks); MPI_Comm_rank(MPI_COMM_WORLD,&taskid); numworkers = numtasks-1; if (taskid == MASTER) { /************************* master code *******************************/ /* Check if numworkers is within range - quit if not */ if ((numworkers > MAXWORKER) || (numworkers < MINWORKER)) { printf("ERROR: the number of tasks must be between %d and %d.\n", MINWORKER+1,MAXWORKER+1); printf("Quitting...\n"); MPI_Abort(MPI_COMM_WORLD, rc); exit(1); } printf ("Starting mpi_heat2D with %d worker tasks.\n", numworkers); /* Initialize grid */ printf("Grid size: X= %d Y= %d Time steps= %d\n",NXPROB,NYPROB,STEPS); printf("Initializing grid and writing initial.dat file...\n"); inidat(NXPROB, NYPROB, u); prtdat(NXPROB, NYPROB, u, "initial.dat"); /* Distribute work to workers. Must first figure out how many rows to */ /* send and what to do with extra rows. */ averow = NXPROB/numworkers; extra = NXPROB%numworkers; offset = 0; for (i=1; i<=numworkers; i++) { rows = (i <= extra) ? averow+1 : averow; /* Tell each worker who its neighbors are, since they must exchange */ /* data with each other. */ if (i == 1) left = NONE; else left = i - 1; if (i == numworkers) right = NONE; else right = i + 1; /* Now send startup information to each worker */ dest = i; MPI_Send(&offset, 1, MPI_INT, dest, BEGIN, MPI_COMM_WORLD); MPI_Send(&rows, 1, MPI_INT, dest, BEGIN, MPI_COMM_WORLD); MPI_Send(&left, 1, MPI_INT, dest, BEGIN, MPI_COMM_WORLD); MPI_Send(&right, 1, MPI_INT, dest, BEGIN, MPI_COMM_WORLD); MPI_Send(&u[0][offset][0], rows*NYPROB, MPI_FLOAT, dest, BEGIN, MPI_COMM_WORLD); printf("Sent to task %d: rows= %d offset= %d ",dest,rows,offset); printf("left= %d right= %d\n",left,right); offset = offset + rows; } /* Now wait for results from all worker tasks */ for (i=1; i<=numworkers; i++) { source = i; msgtype = DONE; MPI_Recv(&offset, 1, MPI_INT, source, msgtype, MPI_COMM_WORLD, &status); MPI_Recv(&rows, 1, MPI_INT, source, msgtype, MPI_COMM_WORLD, &status); MPI_Recv(&u[0][offset][0], rows*NYPROB, MPI_FLOAT, source, msgtype, MPI_COMM_WORLD, &status); } /* Write final output, call X graph and finalize MPI */ printf("Writing final.dat file and generating graph...\n"); prtdat(NXPROB, NYPROB, &u[0][0][0], "final.dat"); printf("Click on MORE button to view initial/final states.\n"); printf("Click on EXIT button to quit program.\n"); //draw_heat(NXPROB,NYPROB); MPI_Finalize(); } /* End of master code */ /************************* workers code **********************************/ if (taskid != MASTER) { /* Initialize everything - including the borders - to zero */ for (iz=0; iz<2; iz++) for (ix=0; ix<NXPROB; ix++) for (iy=0; iy<NYPROB; iy++) u[iz][ix][iy] = 0.0; /* Receive my offset, rows, neighbors and grid partition from master */ source = MASTER; msgtype = BEGIN; MPI_Recv(&offset, 1, MPI_INT, source, msgtype, MPI_COMM_WORLD, &status); MPI_Recv(&rows, 1, MPI_INT, source, msgtype, MPI_COMM_WORLD, &status); MPI_Recv(&left, 1, MPI_INT, source, msgtype, MPI_COMM_WORLD, &status); MPI_Recv(&right, 1, MPI_INT, source, msgtype, MPI_COMM_WORLD, &status); MPI_Recv(&u[0][offset][0], rows*NYPROB, MPI_FLOAT, source, msgtype, MPI_COMM_WORLD, &status); /* Determine border elements. Need to consider first and last columns. */ /* Obviously, row 0 can't exchange with row 0-1. Likewise, the last */ /* row can't exchange with last+1. */ start=offset; end=offset+rows-1; if (offset==0) start=1; if ((offset+rows)==NXPROB) end--; printf("task=%d start=%d end=%d\n",taskid,start,end); /* Begin doing STEPS iterations. Must communicate border rows with */ /* neighbors. If I have the first or last grid row, then I only need */ /* to communicate with one neighbor */ printf("Task %d received work. Beginning time steps...\n",taskid); iz = 0; for (it = 1; it <= STEPS; it++) { if (left != NONE) { MPI_Send(&u[iz][offset][0], NYPROB, MPI_FLOAT, left, RTAG, MPI_COMM_WORLD); source = left; msgtype = LTAG; MPI_Recv(&u[iz][offset-1][0], NYPROB, MPI_FLOAT, source, msgtype, MPI_COMM_WORLD, &status); } if (right != NONE) { MPI_Send(&u[iz][offset+rows-1][0], NYPROB, MPI_FLOAT, right, LTAG, MPI_COMM_WORLD); source = right; msgtype = RTAG; MPI_Recv(&u[iz][offset+rows][0], NYPROB, MPI_FLOAT, source, msgtype, MPI_COMM_WORLD, &status); } /* Now call update to update the value of grid points */ update(start,end,NYPROB,&u[iz][0][0],&u[1-iz][0][0]); iz = 1 - iz; } /* Finally, send my portion of final results back to master */ MPI_Send(&offset, 1, MPI_INT, MASTER, DONE, MPI_COMM_WORLD); MPI_Send(&rows, 1, MPI_INT, MASTER, DONE, MPI_COMM_WORLD); MPI_Send(&u[iz][offset][0], rows*NYPROB, MPI_FLOAT, MASTER, DONE, MPI_COMM_WORLD); MPI_Finalize(); } }
int main (int argc, char *argv[]) { void inidat(); float ***array; /* array for grid */ int taskid, /* this task's unique id */ numtasks, /* number of tasks */ averow,rows,offset,extra, /* for sending rows of data */ dest, source, /* to - from for message send-receive */ left,right, /* neighbor tasks */ msgtype, /* for message types */ rc,start,end, /* misc */ i,x,y,z,it,size,t_sqrt; /* loop variables */ MPI_Status status; MPI_Datatype dt,dt2; MPI_Request req, req2,req3,req4,req5; double t1,t2; /* First, find out my taskid and how many tasks are running */ MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&numtasks); MPI_Comm_rank(MPI_COMM_WORLD,&taskid); /*Set number of threads */ omp_set_num_threads(atoi(argv[1])); // Use n threads for all consecutive parallel regions omp_set_nested(1); if (taskid == 0) { //printf("Grid size: X= %d Y= %d Time steps= %d\n",NXPROB,NYPROB,STEPS); t1 = MPI_Wtime(); } i = 0; while(i*i < (NXPROB*NYPROB)/numtasks) i++; size = i; i = 0; while(i*i<numtasks) i++; t_sqrt = i; MPI_Type_contiguous(size+2,MPI_FLOAT, &dt); MPI_Type_commit(&dt); MPI_Type_vector(size+2,1,size+2,MPI_FLOAT,&dt2); MPI_Type_commit(&dt2); array = malloc(2*sizeof(float**)); for (i = 0;i<2;i++){ array[i] = malloc((2+size)*sizeof(float*)); array[i][0] = malloc(((2+size)*(2+size))*sizeof(float)); for (x = 1;x<2+size;x++){ array[i][x] = &(array[i][0][x*(2+size)]); } } for (z=0; z<2; z++){ for (x=0; x<2+size; x++){ for (y=0; y<2+size; y++){ array[z][x][y] = 0.0; } } } z = 0; inidat(NXPROB,NYPROB,array[z],size*(taskid/t_sqrt),size*(taskid%t_sqrt),size); for (i = 1; i <= STEPS; i++) { if (taskid/t_sqrt != 0) //not first row { MPI_Isend(array[z][1],1,dt,taskid-t_sqrt,100, MPI_COMM_WORLD, &req); MPI_Irecv(array[z][0],1,dt,taskid-t_sqrt,100, MPI_COMM_WORLD, &req2); } if (taskid/t_sqrt != t_sqrt-1) //not last row { MPI_Isend(array[z][size],1,dt,taskid+t_sqrt,100, MPI_COMM_WORLD, &req); MPI_Irecv(array[z][size+1],1,dt,taskid+t_sqrt,100, MPI_COMM_WORLD, &req3); } if(taskid%t_sqrt != 0) //not last column { MPI_Isend(&array[z][0][1],1,dt2,taskid-1,100, MPI_COMM_WORLD, &req); MPI_Irecv(&array[z][0][0],1,dt2,taskid-1,100, MPI_COMM_WORLD, &req4); } if(taskid%t_sqrt != t_sqrt-1) //not last column { MPI_Isend(&array[z][0][size],1,dt2,taskid+1,100, MPI_COMM_WORLD, &req); MPI_Irecv(&array[z][0][size+1],1,dt2,taskid+1,100, MPI_COMM_WORLD, &req5); } inner_update(size,array[z],array[1-z]); if (taskid/t_sqrt != 0) MPI_Wait(&req2,&status); if (taskid/t_sqrt != t_sqrt-1) MPI_Wait(&req3,&status); if(taskid%t_sqrt != 0) MPI_Wait(&req4,&status); if(taskid%t_sqrt != t_sqrt-1) MPI_Wait(&req5,&status); outer_update(size,taskid,t_sqrt,array[z],array[1-z]); z = 1-z; } if (taskid == 0){ t2 = MPI_Wtime(); printf("MPI_Wtime measured: %1.2f\n", t2-t1); } for (i = 0;i<2;i++){ free(array[i][0]); free(array[i]); } free(array); MPI_Type_free(&dt); MPI_Type_free(&dt2); MPI_Finalize(); }