int main(int argc, char** argv) { MPI_Init(&argc, &argv); GridOptions go = parse_grid_options(argc, argv); OTJ_Timer tm = OTJ_Timer_Start("Initial Conditions"); OTJ_Grid initial_conditions = generate_initial_conditions(go.len_x, go.len_y); OTJ_Timer_Stop(tm); tm = OTJ_Timer_Start("Solve Problem"); Stepsize h = stepsize_from_grid_options(go); OTJ_Grid current_grid = createDeviceGrid(initial_conditions); OTJ_Grid previous_grid = createAndCopyDeviceGrid(initial_conditions); OTJ_Grid_Swap(¤t_grid, &previous_grid); int tau; for(tau = 1; tau < go.len_t; tau++) { OTJ_Grid_Swap(¤t_grid, &previous_grid); apply_boundary_conditions(current_grid); solve_interior(current_grid, previous_grid,h); } OTJ_Grid solution_grid = OTJ_Grid_Alloc(go.len_x, go.len_y); retrieveDeviceGrid(solution_grid, current_grid); OTJ_Timer_Stop(tm); tm = OTJ_Timer_Start("Compare to analytic Solution"); OTJ_Grid analytic_grid = OTJ_Grid_Alloc(go.len_x, go.len_y); populate_analytic_solution(analytic_grid,h,tau); // Overwrite analytic soln with error OTJ_Calculate_Error(analytic_grid, analytic_grid, solution_grid); double gte = OTJ_Global_Error(analytic_grid); double tote = OTJ_Total_Error(analytic_grid); OTJ_Timer_Stop(tm); printf("Global Truncation Error: %f\n",gte); printf("Total Error: %f\n",tote); tm = OTJ_Timer_Start("Store Grid"); OTJ_Grid_Store(solution_grid); OTJ_Timer_Stop(tm); MPI_Finalize(); return 0; }
bool Sol_MultigridPressure3DBase::do_vcycle(double tolerance, int max_iter, double &result_l2, double &result_linf) { clear_error(); int level; int coarse_level = _num_levels-1; apply_boundary_conditions(0); double orig_l2=0, orig_linf=0; restrict_residuals(0,0, (convergence & CONVERGENCE_CALC_L2) ? &orig_l2 : 0, (convergence & CONVERGENCE_CALC_LINF) ? &orig_linf : 0); //printf("Error Before: l2 = %f, linf = %f\n", orig_l2, orig_linf); double orig_error = (convergence & CONVERGENCE_CRITERIA_L2) ? orig_l2 : (convergence & CONVERGENCE_CRITERIA_LINF) ? orig_linf : 1e20; if (orig_error < tolerance) { if (convergence & CONVERGENCE_CALC_L2) result_l2 = orig_l2; if (convergence & CONVERGENCE_CALC_LINF) result_linf = orig_linf; return true; } for (int i_cyc = 0; i_cyc < max_iter; i_cyc++) { // going down for (level = 0; level < coarse_level; level++) { relax(level, nu1, RO_RED_BLACK); clear_zero(level+1); apply_boundary_conditions(level+1); if (level == 0) { restrict_residuals(0, 1, (convergence & CONVERGENCE_CALC_L2) ? &result_l2 : 0, (convergence & CONVERGENCE_CALC_LINF) ? &result_linf : 0); double residual = (convergence & CONVERGENCE_CRITERIA_L2) ? result_l2 : (convergence & CONVERGENCE_CRITERIA_LINF) ? result_linf : 1e20; //printf("%d: residual = %.12f\n", i_cyc, result_linf); // if we're below tolerance, or we're no longer converging, bail out if (residual < tolerance) { // last time through, we need to apply boundary condition to u[0], since we just relaxed (above), but haven't propagated changes to ghost cells. // in the case we are not finished, by the time we get back to level 0 (via coarsening & then prolongation), ghost cells would be filled in. // but since we are bailing here, we need to explicitly make ghost cells up-to-date with u. apply_boundary_conditions(0); //printf("[INFO] Sol_MultigridPressure3DBase::do_vcycle - error after %d iterations: L2 = %f (%fx), Linf = %f (%fx)\n", i_cyc, result_l2, orig_l2 / result_l2, result_linf, orig_linf / result_linf); return !any_error(); } } else restrict_residuals(level, level+1, 0, 0); } // these relaxation steps are essentially free, so do lots of them // (reference implementation uses nu1+nu2) - this is probably overkill, i need to revisit this // with a good heuristic. Inhomogeneous conditions require more iterations. int coarse_iters = max3(nx(coarse_level)*ny(coarse_level), ny(coarse_level)*nz(coarse_level), nx(coarse_level)*nz(coarse_level))/2; relax(coarse_level, coarse_iters, make_symmetric_operator ? RO_SYMMETRIC : RO_RED_BLACK); //relax(coarse_level, (nx(coarse_level)*ny(coarse_level)*nz(coarse_level))/2); // going up for (level = coarse_level-1; level >= 0; level--) { prolong(level+1, level); // don't need to relax finest grid since it will get relaxed at the beginning of the next v-cycle if (level > 0) relax(level, nu2, make_symmetric_operator ? RO_BLACK_RED : RO_RED_BLACK); } } if (!(convergence & CONVERGENCE_CRITERIA_NONE)) printf("[WARNING] Sol_MultigridPressure3DBase::do_vcycle - Failed to converge, error after: L2 = %f (%fx), Linf = %f (%fx)\n", result_l2, orig_l2 / result_l2, result_linf, orig_linf / result_linf); return false; }
void main(int argc, char *argv[]) { MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&numtasks); MPI_Comm_rank(MPI_COMM_WORLD,&taskid); numworkers = numtasks-1; if (taskid == MASTER) { comp = (double *)malloc(MESHX*sizeof(double)); initialize(); averow = MESHX/numworkers; extra = MESHX%numworkers; offset = 0; for (rank=1; rank <= (numworkers); rank++) { rows = (rank <= extra) ? averow+1 : averow; left_node = rank - 1; right_node = rank + 1; if(rank == 1) { left_node = NONE; } if(rank == (numworkers)) { right_node = NONE; } dest = rank; MPI_Send(&offset, 1, MPI_INT, dest, BEGIN, MPI_COMM_WORLD); MPI_Send(&rows, 1, MPI_INT, dest, BEGIN, MPI_COMM_WORLD); MPI_Send(&left_node, 1, MPI_INT, dest, BEGIN, MPI_COMM_WORLD); MPI_Send(&right_node, 1, MPI_INT, dest, BEGIN, MPI_COMM_WORLD); MPI_Send(&comp[offset], rows, MPI_DOUBLE, dest, BEGIN, MPI_COMM_WORLD); offset = offset + rows; } for (t=1; t < ntimesteps; t++) { if (t%saveT == 0) { receivefrmworker(); writetofile(t); } } free(comp); } if(taskid != MASTER) { source = MASTER; msgtype = BEGIN; MPI_Recv(&offset, 1, MPI_INT, source, msgtype, MPI_COMM_WORLD, &status); MPI_Recv(&rows, 1, MPI_INT, source, msgtype, MPI_COMM_WORLD, &status); MPI_Recv(&left_node, 1, MPI_INT, source, msgtype, MPI_COMM_WORLD, &status); MPI_Recv(&right_node, 1, MPI_INT, source, msgtype, MPI_COMM_WORLD, &status); start = 1; if ((taskid == 1) || (taskid == numworkers)) { comp = (double *)malloc((rows+1)*sizeof(double)); if (taskid == 1) { MPI_Recv(&comp[0], rows, MPI_DOUBLE, source, msgtype, MPI_COMM_WORLD, &status); } else { MPI_Recv(&comp[1], rows, MPI_DOUBLE, source, msgtype, MPI_COMM_WORLD, &status); } end = rows-1; } else { comp = (double *)malloc((rows+2)*sizeof(double)); MPI_Recv(&comp[1], rows, MPI_DOUBLE, source, msgtype, MPI_COMM_WORLD, &status); end = rows; } long t; for (t=1; t < ntimesteps; t++) { mpiexchange(taskid); solverloop(comp); apply_boundary_conditions(taskid); if (t%saveT == 0) { sendtomaster(taskid); } } free(comp); } // printf("Hello Computer, total no of processes = %d, my name is=%d\n",numtasks, taskid); MPI_Finalize(); }
bool Sol_MultigridPressure3DBase::do_fmg(double tolerance, int max_iter, double &result_l2, double &result_linf) { CPUTimer timer; timer.start(); clear_error(); int level_ncyc; int level; result_l2 = result_linf = 0; apply_boundary_conditions(0); double orig_l2=0, orig_linf=0; restrict_residuals(0,0, (convergence & CONVERGENCE_CALC_L2) ? &orig_l2 : 0, (convergence & CONVERGENCE_CALC_LINF) ? &orig_linf : 0); //printf("Error Before: l2 = %f, linf = %f\n", orig_l2, orig_linf); double orig_error = (convergence & CONVERGENCE_CRITERIA_L2) ? orig_l2 : (convergence & CONVERGENCE_CRITERIA_LINF) ? orig_linf : 1e20; if (orig_error < tolerance) { if (convergence & CONVERGENCE_CALC_L2) result_l2 = orig_l2; if (convergence & CONVERGENCE_CALC_LINF) result_linf = orig_linf; return true; } #if 0 // for testing relaxation only, enable this code block double iter_l2, iter_linf; for (int o=0; o < 100; o++) { relax(0, 10, RO_SYMMETRIC); restrict_residuals(0, 0, &iter_l2, &iter_linf); printf("error: l2 = %.12f, linf = %.12f\n", iter_l2, iter_linf); } printf("reduction: l2 = %f, linf = %f\n", orig_l2/iter_l2, orig_linf/iter_linf); result_l2 = iter_l2; result_linf = iter_linf; return true; #endif // initialize all the residuals. // we need this because in the FMG loop below, we don't necessarily start at level 0, but // rather 2 levels from the finest. Which means we first need to propagate the errors all the way down first before // beginning FMG. int coarse_level = _num_levels-1; int num_vcyc = 0; for (level = 0; level < _num_levels-1; level++) { // initialize U (solution) at next level to zero clear_zero(level+1); apply_boundary_conditions(level+1); // restrict residuals to the next level. restrict_residuals(level, level+1,0,0); } // do the full-multigrid loop for (int fine_level = _num_levels-1; fine_level >= 0 ; fine_level--) { //{ int fine_level = 0; // do a single v-cycle instead // we always do one extra v-cycle level_ncyc = (fine_level == 0) ? max_iter+1 : 1; // do ncyc v-cycle's for (int i_cyc = 0; i_cyc < level_ncyc; i_cyc++) { if (fine_level == 0) num_vcyc++; // going down for (level = fine_level; level < coarse_level; level++) { relax(level, nu1, RO_RED_BLACK); clear_zero(level+1); apply_boundary_conditions(level+1); if (level == 0) { restrict_residuals(0, 1, (convergence & CONVERGENCE_CALC_L2) ? &result_l2 : 0, (convergence & CONVERGENCE_CALC_LINF) ? &result_linf : 0); double residual = (convergence & CONVERGENCE_CRITERIA_L2) ? result_l2 : (convergence & CONVERGENCE_CRITERIA_LINF) ? result_linf : 1e20; if (ThreadManager::this_image() == 0) printf("%d: residual = %.12f,%.12f\n", i_cyc, result_linf, result_l2); // if we're below tolerance, or we're no longer converging, bail out if (residual < tolerance) { // last time through, we need to apply boundary condition to u[0], since we just relaxed (above), but haven't propagated changes to ghost cells. // in the case we are not finished, by the time we get back to level 0 (via coarsening & then prolongation), ghost cells would be filled in. // but since we are bailing here, we need to explicitly make ghost cells up-to-date with u. apply_boundary_conditions(0); timer.stop(); //printf("[ELAPSED] Sol_MultigridPressure3DBase::do_fmg - converged in %fms\n", timer.elapsed_ms()); printf("[INFO] Sol_MultigridPressure3DBase::do_fmg - error after %d iterations: L2 = %f (%fx), Linf = %f (%fx)\n", i_cyc, result_l2, orig_l2 / result_l2, result_linf, orig_linf / result_linf); global_counter_add("vcycles", num_vcyc); return !any_error(); } } else restrict_residuals(level, level+1, 0, 0); } // these relaxation steps are essentially free, so do lots of them // (reference implementation uses nu1+nu2) - this is probably overkill, i need to revisit this // with a good heuristic. Inhomogeneous conditions require more iterations. int coarse_iters = max3(nx(coarse_level)*ny(coarse_level), ny(coarse_level)*nz(coarse_level), nx(coarse_level)*nz(coarse_level))/2; relax(coarse_level, coarse_iters, make_symmetric_operator ? RO_SYMMETRIC : RO_RED_BLACK); //relax(coarse_level, (nx(coarse_level)*ny(coarse_level)*nz(coarse_level))/2); // going up for (level = coarse_level-1; level >= fine_level; level--) { prolong(level+1, level); // don't need to relax finest grid since it will get relaxed at the beginning of the next v-cycle if (level > 0) relax(level, nu2, make_symmetric_operator ? RO_BLACK_RED : RO_RED_BLACK); } } if (fine_level > 0) { // if not at finest level, need to prolong once more to next finer level for the next fine_level value prolong(fine_level, fine_level-1); } } timer.stop(); //printf("[ELAPSED] Sol_MultigridPressure3DBase::do_fmg - stopped iterations after %fms\n", timer.elapsed_ms()); if (!(convergence & CONVERGENCE_CRITERIA_NONE)) printf("[WARNING] Sol_MultigridPressure3DBase::do_fmg - Failed to converge, error after: L2 = %.12f (%fx), Linf = %.12f (%fx)\n", result_l2, orig_l2 / result_l2, result_linf, orig_linf / result_linf); return false; }