int timestep(const t_param params, t_speed* cells, t_speed* tmp_cells, int* obstacles) { accelerate_flow(params,cells,obstacles); propagate(params,cells,tmp_cells); rebound_or_collision(params,cells,tmp_cells,obstacles); return EXIT_SUCCESS; }
int main(int argc, char* argv[]) { param_t params; /* struct to hold parameter values */ speed_t* cells = NULL; /* grid containing fluid densities */ speed_t* tmp_cells = NULL; /* scratch space */ int* obstacles = NULL; /* grid indicating which cells are blocked */ int* rowsSetup = NULL; int* accelgrid = NULL; float* av_vels = NULL; /* a record of the av. velocity computed for each timestep */ int ii, rank, size, tag=0, jj; /* generic counter */ struct timeval timstr; /* structure to hold elapsed time */ struct rusage ru; /* structure to hold CPU time--system and user */ double tic,toc; /* floating point numbers to calculate elapsed wallclock time */ double usrtim; /* floating point number to record elapsed user CPU time */ double systim; /* floating point number to record elapsed system CPU time */ int halorow; int buff; int extra; int start_row; int end_row; MPI_Status status; accel_area_t accel_area; /* initialise our data structures and load values from file */ initialise(argv[1], &accel_area, ¶ms, &cells, &tmp_cells, &obstacles, &av_vels, &accelgrid); // Initialize MPI environment. MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); extra = params.ny%size; halorow = (rank<extra) ? (params.ny/size + 1) * params.nx : (params.ny/size) * params.nx; calc_row_setup(&rowsSetup, rank, halorow, extra, params); buff = rowsSetup[0]; start_row = rowsSetup[1]; end_row = rowsSetup[2]; /* iterate for max_iters timesteps */ gettimeofday(&timstr,NULL); tic=timstr.tv_sec+(timstr.tv_usec/1000000.0); for(ii=0; ii<params.max_iters; ii++) { accelerate_flow(params,accel_area,cells,start_row, end_row, accelgrid); lattice(params,cells,tmp_cells,obstacles, av_vels, start_row, end_row, ii); } gettimeofday(&timstr,NULL); toc=timstr.tv_sec+(timstr.tv_usec/1000000.0); getrusage(RUSAGE_SELF, &ru); timstr=ru.ru_utime; usrtim=timstr.tv_sec+(timstr.tv_usec/1000000.0); timstr=ru.ru_stime; systim=timstr.tv_sec+(timstr.tv_usec/1000000.0); float* buffer = malloc(buff * 9 * sizeof(float)); if(rank != 0) { for(ii=0; ii<halorow; ii++) { buffer[9*ii] = cells[start_row+ii].speeds[0]; buffer[9*ii+1] = cells[start_row+ii].speeds[1]; buffer[9*ii+2] = cells[start_row+ii].speeds[2]; buffer[9*ii+3] = cells[start_row+ii].speeds[3]; buffer[9*ii+4] = cells[start_row+ii].speeds[4]; buffer[9*ii+5] = cells[start_row+ii].speeds[5]; buffer[9*ii+6] = cells[start_row+ii].speeds[6]; buffer[9*ii+7] = cells[start_row+ii].speeds[7]; buffer[9*ii+8] = cells[start_row+ii].speeds[8]; } MPI_Send(buffer, 9*buff, MPI_FLOAT, 0, tag, MPI_COMM_WORLD); } else { if(extra == 0) { for(ii=1; ii<size; ii++) { MPI_Recv(buffer, 9*buff, MPI_FLOAT, ii, tag, MPI_COMM_WORLD, &status); for(jj=0; jj<halorow; jj++) { cells[halorow*ii+jj].speeds[0] = buffer[9*jj]; cells[halorow*ii+jj].speeds[1] = buffer[9*jj+1]; cells[halorow*ii+jj].speeds[2] = buffer[9*jj+2]; cells[halorow*ii+jj].speeds[3] = buffer[9*jj+3]; cells[halorow*ii+jj].speeds[4] = buffer[9*jj+4]; cells[halorow*ii+jj].speeds[5] = buffer[9*jj+5]; cells[halorow*ii+jj].speeds[6] = buffer[9*jj+6]; cells[halorow*ii+jj].speeds[7] = buffer[9*jj+7]; cells[halorow*ii+jj].speeds[8] = buffer[9*jj+8]; } } } else { for(ii=1; ii<extra; ii++) { MPI_Recv(buffer, 9*buff, MPI_FLOAT, ii, tag, MPI_COMM_WORLD, &status); for(jj=0; jj<halorow; jj++) { cells[halorow*ii+jj].speeds[0] = buffer[9*jj]; cells[halorow*ii+jj].speeds[1] = buffer[9*jj+1]; cells[halorow*ii+jj].speeds[2] = buffer[9*jj+2]; cells[halorow*ii+jj].speeds[3] = buffer[9*jj+3]; cells[halorow*ii+jj].speeds[4] = buffer[9*jj+4]; cells[halorow*ii+jj].speeds[5] = buffer[9*jj+5]; cells[halorow*ii+jj].speeds[6] = buffer[9*jj+6]; cells[halorow*ii+jj].speeds[7] = buffer[9*jj+7]; cells[halorow*ii+jj].speeds[8] = buffer[9*jj+8]; } } for(ii=extra; ii<size; ii++) { MPI_Recv(buffer, 9*buff, MPI_FLOAT, ii, tag, MPI_COMM_WORLD, &status); for(jj=0; jj<(params.ny/size) * params.nx; jj++) { int local_extra = halorow * extra + (halorow-params.nx)* (ii-extra); cells[local_extra + jj].speeds[0] = buffer[9*jj]; cells[local_extra + jj].speeds[1] = buffer[9*jj+1]; cells[local_extra + jj].speeds[2] = buffer[9*jj+2]; cells[local_extra + jj].speeds[3] = buffer[9*jj+3]; cells[local_extra + jj].speeds[4] = buffer[9*jj+4]; cells[local_extra + jj].speeds[5] = buffer[9*jj+5]; cells[local_extra + jj].speeds[6] = buffer[9*jj+6]; cells[local_extra + jj].speeds[7] = buffer[9*jj+7]; cells[local_extra + jj].speeds[8] = buffer[9*jj+8]; } } } free(buffer); buffer = NULL; } MPI_Finalize(); /*Finilize MPI*/ if(rank == 0) { printf("==done==\n"); printf("Reynolds number:\t\t%.12E\n",calc_reynolds(params,cells,obstacles,av_vels[params.max_iters-1])); printf("Elapsed time:\t\t\t%.6lf (s)\n", toc-tic); printf("Elapsed user CPU time:\t\t%.6lf (s)\n", usrtim); printf("Elapsed system CPU time:\t%.6lf (s)\n", systim); write_values(params,cells,obstacles,av_vels); finalise(¶ms, &cells, &tmp_cells, &obstacles, &av_vels, &accelgrid, &rowsSetup); } return EXIT_SUCCESS; }