int main(int argc, char **argv) { int nb_th=1; double dt = 0; long nvtk = 0; char outnum[80]; long time_output = 0; // double output_time = 0.0; double next_output_time = 0; double start_time = 0, end_time = 0; double start_iter = 0, end_iter = 0; double elaps = 0; start_time = cclock(); process_args(argc, argv, &H); hydro_init(&H, &Hv); PRINTUOLD(H, &Hv); printf("Hydro starts - sequential version \n"); // vtkfile(nvtk, H, &Hv); if (H.dtoutput > 0) { // outputs are in physical time not in time steps time_output = 1; next_output_time = next_output_time + H.dtoutput; } while ((H.t < H.tend) && (H.nstep < H.nstepmax)) { start_iter = cclock(); outnum[0] = 0; flops = 0; if ((H.nstep % 2) == 0) { compute_deltat(&dt, H, &Hw, &Hv, &Hvw); if (H.nstep == 0) { dt = dt / 2.0; } } if ((H.nstep % 2) == 0) { hydro_godunov(1, dt, H, &Hv, &Hw, &Hvw); hydro_godunov(2, dt, H, &Hv, &Hw, &Hvw); } else { hydro_godunov(2, dt, H, &Hv, &Hw, &Hvw); hydro_godunov(1, dt, H, &Hv, &Hw, &Hvw); } end_iter = cclock(); H.nstep++; H.t += dt; if (flops > 0) { double iter_time = (double) (end_iter - start_iter); if (iter_time > 1.e-9) { double mflops = (double) flops / (double) 1.e+6 / iter_time; sprintf(outnum, "%s {%.3f Mflops} (%.3fs)", outnum, mflops, iter_time); } } else { double iter_time = (double) (end_iter - start_iter); sprintf(outnum, "%s (%.3fs)", outnum, iter_time); } if (time_output == 0) { if ((H.nstep % H.noutput) == 0) { vtkfile(++nvtk, H, &Hv); sprintf(outnum, "%s [%04ld]", outnum, nvtk); } } else { if (H.t >= next_output_time) { vtkfile(++nvtk, H, &Hv); next_output_time = next_output_time + H.dtoutput; sprintf(outnum, "%s [%04ld]", outnum, nvtk); } } fprintf(stdout, "--> step=%-4ld %12.5e, %10.5e %s\n", H.nstep, H.t, dt, outnum); } // end while loop hydro_finish(H, &Hv); end_time = cclock(); elaps = (double) (end_time - start_time); timeToString(outnum, elaps); fprintf(stdout, "Hydro ends in %ss (%.3lf).\n", outnum, elaps); return 0; }
int main(int argc, char **argv) { double dt = 0; long nvtk = 0; char outnum[240]; long time_output = 0; long flops = 0; // double output_time = 0.0; double next_output_time = 0; double start_time = 0, end_time = 0; double start_iter = 0, end_iter = 0; double elaps = 0; double avgMcps = 0; long nAvgMcps = 0; #ifdef MPI MPI_Init(&argc, &argv); DeviceSet(); #endif if (H.mype == 1) fprintf(stdout, "Hydro starts.\n"); process_args(argc, argv, &H); hydro_init(&H, &Hv); // PRINTUOLD(H, &Hv); cuAllocOnDevice(H); // Allocate work space for 1D sweeps allocate_work_space(H, &Hw, &Hvw); // vtkfile(nvtk, H, &Hv); if (H.dtoutput > 0) { // outputs are in physical time not in time steps time_output = 1; next_output_time = next_output_time + H.dtoutput; } if (H.dtoutput > 0 || H.noutput > 0) vtkfile(++nvtk, H, &Hv); if (H.mype == 0) fprintf(stdout, "Hydro starts main loop.\n"); cuPutUoldOnDevice(H, &Hv); start_time = cclock(); // fprintf(stdout, "%lg %lg %d %d \n", H.t, H.tend, H.nstep, H.nstepmax); while ((H.t < H.tend) && (H.nstep < H.nstepmax)) { double iter_time = 0; flopsAri = flopsSqr = flopsMin = flopsTra = 0; start_iter = cclock(); outnum[0] = 0; flops = 0; if ((H.nstep % 2) == 0) { cuComputeDeltat(&dt, H, &Hw, &Hv, &Hvw); // fprintf(stdout, "dt=%lg\n", dt); if (H.nstep == 0) { dt = dt / 2.0; } if (H.nproc > 1) { #ifdef MPI double dtmin; int uno = 1; MPI_Allreduce(&dt, &dtmin, uno, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); dt = dtmin; #endif } } if ((H.nstep % 2) == 0) { cuHydroGodunov(1, dt, H, &Hv, &Hw, &Hvw); } else { cuHydroGodunov(2, dt, H, &Hv, &Hw, &Hvw); } end_iter = cclock(); iter_time = (double) (end_iter - start_iter); H.nstep++; H.t += dt; { double iter_time = (double) (end_iter - start_iter); #ifdef MPI long flopsAri_t, flopsSqr_t, flopsMin_t, flopsTra_t; MPI_Allreduce(&flopsAri, &flopsAri_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&flopsSqr, &flopsSqr_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&flopsMin, &flopsMin_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&flopsTra, &flopsTra_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); // if (H.mype == 1) // printf("%ld %ld %ld %ld %ld %ld %ld %ld \n", flopsAri, flopsSqr, flopsMin, flopsTra, flopsAri_t, flopsSqr_t, flopsMin_t, flopsTra_t); flops = flopsAri_t * FLOPSARI + flopsSqr_t * FLOPSSQR + flopsMin_t * FLOPSMIN + flopsTra_t * FLOPSTRA; #else flops = flopsAri * FLOPSARI + flopsSqr * FLOPSSQR + flopsMin * FLOPSMIN + flopsTra * FLOPSTRA; #endif nbFLOPS++; if (flops > 0) { if (iter_time > 1.e-9) { double mflops = (double) flops / (double) 1.e+6 / iter_time; MflopsSUM += mflops; sprintf(outnum, "%s {%.2f Mflops %ld Ops} (%.3fs)", outnum, mflops, flops, iter_time); } } else { sprintf(outnum, "%s (%.3fs)", outnum, iter_time); } } if (iter_time > 1.e-9) { double mcps = ((double) H.globnx * (double) H.globny) / iter_time / 1e6l; if (H.nstep > 5) { sprintf(outnum, "%s (%.1lf MC/s)", outnum, mcps); nAvgMcps++; avgMcps += mcps; } } if (time_output == 0 && H.noutput > 0) { if ((H.nstep % H.noutput) == 0) { cuGetUoldFromDevice(H, &Hv); vtkfile(++nvtk, H, &Hv); sprintf(outnum, "%s [%04ld]", outnum, nvtk); } } else { if (time_output == 1 && H.t >= next_output_time) { cuGetUoldFromDevice(H, &Hv); vtkfile(++nvtk, H, &Hv); next_output_time = next_output_time + H.dtoutput; sprintf(outnum, "%s [%04ld]", outnum, nvtk); } } if (H.mype == 0) { fprintf(stdout, "--> step=%-4ld %12.5e, %10.5e %s\n", H.nstep, H.t, dt, outnum); fflush(stdout); } } end_time = cclock(); hydro_finish(H, &Hv); cuFreeOnDevice(); // Deallocate work space deallocate_work_space(H, &Hw, &Hvw); elaps = (double) (end_time - start_time); timeToString(outnum, elaps); if (H.mype == 0) fprintf(stdout, "Hydro ends in %ss (%.3lf) <%.2lf MFlops>.\n", outnum, elaps, (float) (MflopsSUM / nbFLOPS)); if (H.mype == 0) { avgMcps /= nAvgMcps; fprintf(stdout, "Average MC/s: %.1lf\n", avgMcps); } #ifdef MPI MPI_Finalize(); #endif return 0; }
int main(int argc, char **argv) { real_t dt = 0; int nvtk = 0; char outnum[80]; int time_output = 0; long flops = 0; // real_t output_time = 0.0; real_t next_output_time = 0; double start_time = 0, end_time = 0; double start_iter = 0, end_iter = 0; double elaps = 0; struct timespec start, end; // array of timers to profile the code memset(functim, 0, TIM_END * sizeof(functim[0])); #ifdef MPI MPI_Init(&argc, &argv); #endif process_args(argc, argv, &H); hydro_init(&H, &Hv); if (H.mype == 0) fprintf(stdout, "Hydro starts in %s precision.\n", ((sizeof(real_t) == sizeof(double))? "double": "single")); #ifdef _OPENMP if (H.mype == 0) { fprintf(stdout, "Hydro: OpenMP mode ON\n"); fprintf(stdout, "Hydro: OpenMP %d max threads\n", omp_get_max_threads()); fprintf(stdout, "Hydro: OpenMP %d num threads\n", omp_get_num_threads()); fprintf(stdout, "Hydro: OpenMP %d num procs\n", omp_get_num_procs()); } #endif #ifdef MPI if (H.mype == 0) { fprintf(stdout, "Hydro: MPI run with %d procs\n", H.nproc); } #else fprintf(stdout, "Hydro: standard build\n"); #endif // PRINTUOLD(H, &Hv); #ifdef MPI if (H.nproc > 1) MPI_Barrier(MPI_COMM_WORLD); #endif if (H.dtoutput > 0) { // outputs are in physical time not in time steps time_output = 1; next_output_time = next_output_time + H.dtoutput; } if (H.dtoutput > 0 || H.noutput > 0) vtkfile(++nvtk, H, &Hv); if (H.mype == 0) fprintf(stdout, "Hydro starts main loop.\n"); //pre-allocate memory before entering in loop //For godunov scheme start = cclock(); start = cclock(); allocate_work_space(H.nxyt, H, &Hw_godunov, &Hvw_godunov); compute_deltat_init_mem(H, &Hw_deltat, &Hvw_deltat); end = cclock(); if (H.mype == 0) fprintf(stdout, "Hydro: init mem %lfs\n", ccelaps(start, end)); // we start timings here to avoid the cost of initial memory allocation start_time = dcclock(); while ((H.t < H.tend) && (H.nstep < H.nstepmax)) { // reset perf counter for this iteration flopsAri = flopsSqr = flopsMin = flopsTra = 0; start_iter = dcclock(); outnum[0] = 0; if ((H.nstep % 2) == 0) { dt = 0; // if (H.mype == 0) fprintf(stdout, "Hydro computes deltat.\n"); start = cclock(); compute_deltat(&dt, H, &Hw_deltat, &Hv, &Hvw_deltat); end = cclock(); functim[TIM_COMPDT] += ccelaps(start, end); if (H.nstep == 0) { dt = dt / 2.0; } #ifdef MPI if (H.nproc > 1) { real_t dtmin; // printf("pe=%4d\tdt=%lg\n",H.mype, dt); if (sizeof(real_t) == sizeof(double)) { MPI_Allreduce(&dt, &dtmin, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); } else { MPI_Allreduce(&dt, &dtmin, 1, MPI_FLOAT, MPI_MIN, MPI_COMM_WORLD); } dt = dtmin; } #endif } // if (H.mype == 1) fprintf(stdout, "Hydro starts godunov.\n"); if ((H.nstep % 2) == 0) { hydro_godunov(1, dt, H, &Hv, &Hw_godunov, &Hvw_godunov); // hydro_godunov(2, dt, H, &Hv, &Hw, &Hvw); } else { hydro_godunov(2, dt, H, &Hv, &Hw_godunov, &Hvw_godunov); // hydro_godunov(1, dt, H, &Hv, &Hw, &Hvw); } end_iter = dcclock(); H.nstep++; H.t += dt; { real_t iter_time = (real_t) (end_iter - start_iter); #ifdef MPI long flopsAri_t, flopsSqr_t, flopsMin_t, flopsTra_t; start = cclock(); MPI_Allreduce(&flopsAri, &flopsAri_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&flopsSqr, &flopsSqr_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&flopsMin, &flopsMin_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&flopsTra, &flopsTra_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); // if (H.mype == 1) // printf("%ld %ld %ld %ld %ld %ld %ld %ld \n", flopsAri, flopsSqr, flopsMin, flopsTra, flopsAri_t, flopsSqr_t, flopsMin_t, flopsTra_t); flops = flopsAri_t * FLOPSARI + flopsSqr_t * FLOPSSQR + flopsMin_t * FLOPSMIN + flopsTra_t * FLOPSTRA; end = cclock(); functim[TIM_ALLRED] += ccelaps(start, end); #else flops = flopsAri * FLOPSARI + flopsSqr * FLOPSSQR + flopsMin * FLOPSMIN + flopsTra * FLOPSTRA; #endif nbFLOPS++; if (flops > 0) { if (iter_time > 1.e-9) { double mflops = (double) flops / (double) 1.e+6 / iter_time; MflopsSUM += mflops; sprintf(outnum, "%s {%.2f Mflops %ld Ops} (%.3fs)", outnum, mflops, flops, iter_time); } } else { sprintf(outnum, "%s (%.3fs)", outnum, iter_time); } } if (time_output == 0 && H.noutput > 0) { if ((H.nstep % H.noutput) == 0) { vtkfile(++nvtk, H, &Hv); sprintf(outnum, "%s [%04d]", outnum, nvtk); } } else { if (time_output == 1 && H.t >= next_output_time) { vtkfile(++nvtk, H, &Hv); next_output_time = next_output_time + H.dtoutput; sprintf(outnum, "%s [%04d]", outnum, nvtk); } } if (H.mype == 0) { fprintf(stdout, "--> step=%4d, %12.5e, %10.5e %s\n", H.nstep, H.t, dt, outnum); fflush(stdout); } } // while end_time = dcclock(); // Deallocate work spaces deallocate_work_space(H.nxyt, H, &Hw_godunov, &Hvw_godunov); compute_deltat_clean_mem(H, &Hw_deltat, &Hvw_deltat); hydro_finish(H, &Hv); elaps = (double) (end_time - start_time); timeToString(outnum, elaps); if (H.mype == 0) { fprintf(stdout, "Hydro ends in %ss (%.3lf) <%.2lf MFlops>.\n", outnum, elaps, (float) (MflopsSUM / nbFLOPS)); fprintf(stdout, " "); } if (H.nproc == 1) { int sizeFmt = sizeLabel(functim, TIM_END); printTimingsLabel(TIM_END, sizeFmt); fprintf(stdout, "\n"); fprintf(stdout, "PE0 "); printTimings(functim, TIM_END, sizeFmt); fprintf(stdout, "\n"); fprintf(stdout, "%% "); percentTimings(functim, TIM_END); printTimings(functim, TIM_END, sizeFmt); fprintf(stdout, "\n"); } #ifdef MPI if (H.nproc > 1) { double timMAX[TIM_END]; double timMIN[TIM_END]; double timSUM[TIM_END]; MPI_Allreduce(functim, timMAX, TIM_END, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(functim, timMIN, TIM_END, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); MPI_Allreduce(functim, timSUM, TIM_END, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); if (H.mype == 0) { int sizeFmt = sizeLabel(timMAX, TIM_END); printTimingsLabel(TIM_END, sizeFmt); fprintf(stdout, "\n"); fprintf(stdout, "MIN "); printTimings(timMIN, TIM_END, sizeFmt); fprintf(stdout, "\n"); fprintf(stdout, "MAX "); printTimings(timMAX, TIM_END, sizeFmt); fprintf(stdout, "\n"); fprintf(stdout, "AVG "); avgTimings(timSUM, TIM_END, H.nproc); printTimings(timSUM, TIM_END, sizeFmt); fprintf(stdout, "\n"); } } #endif #ifdef MPI MPI_Finalize(); #endif return 0; }