void Timing::stop(std::string const &name){ // get or create timer Timer &timer = timers[name]; #ifdef KRIPKE_USE_BGPM HPM_Stop(name.c_str()); #endif #ifdef RAJA_USE_CALIPER cali::Annotation(name.c_str()).end(); #endif if(timer.started){ #ifdef KRIPKE_USE_PAPI int num_papi = papi_event.size(); if(num_papi > 0){ // read timers long long tmp[16]; //PAPI_stop_counters(tmp, num_papi); PAPI_read(papi_set, tmp); // accumulate to all started timers (since this clears the PAPI values) for(int i = 0;i < num_papi;++ i){ timer.papi_total[i] += tmp[i] - timer.papi_start_values[i]; } } #endif // Stop the timer timer.started = false; timer.total_time += getTime() - timer.start_time; timer.count ++; } }
//------------------------------------------------------------------------------------------------------------------------------ void bench_hpgmg(mg_type *all_grids, int onLevel, double a, double b, double dtol, double rtol){ int doTiming; int minSolves = 10; // do at least minSolves MGSolves double timePerSolve = 0; for(doTiming=0;doTiming<=1;doTiming++){ // first pass warms up, second pass times #ifdef USE_HPM // IBM performance counters for BGQ... if( (doTiming==1) && (onLevel==0) )HPM_Start("FMGSolve()"); #endif #ifdef USE_MPI double minTime = 60.0; // minimum time in seconds that the benchmark should run double startTime = MPI_Wtime(); if(doTiming==1){ if((minTime/timePerSolve)>minSolves)minSolves=(minTime/timePerSolve); // if one needs to do more than minSolves to run for minTime, change minSolves } #endif if(all_grids->levels[onLevel]->my_rank==0){ if(doTiming==0){fprintf(stdout,"\n\n===== Warming up by running %d solves ==========================================\n",minSolves);} else{fprintf(stdout,"\n\n===== Running %d solves ========================================================\n",minSolves);} fflush(stdout); } int numSolves = 0; // solves completed MGResetTimers(all_grids); while( (numSolves<minSolves) ){ zero_vector(all_grids->levels[onLevel],VECTOR_U); #ifdef USE_FCYCLES FMGSolve(all_grids,onLevel,VECTOR_U,VECTOR_F,a,b,dtol,rtol); #else MGSolve(all_grids,onLevel,VECTOR_U,VECTOR_F,a,b,dtol,rtol); #endif numSolves++; } #ifdef USE_MPI if(doTiming==0){ double endTime = MPI_Wtime(); timePerSolve = (endTime-startTime)/numSolves; MPI_Bcast(&timePerSolve,1,MPI_DOUBLE,0,MPI_COMM_WORLD); // after warmup, process 0 broadcasts the average time per solve (consensus) } #endif #ifdef USE_HPM // IBM performance counters for BGQ... if( (doTiming==1) && (onLevel==0) )HPM_Stop("FMGSolve()"); #endif } }
static PetscErrorCode SampleOnGrid(MPI_Comm comm,Op op,const PetscInt M[3],const PetscInt smooth[2],PetscInt nrepeat,PetscLogDouble mintime,PetscLogDouble *memused,PetscLogDouble *memavail,PetscBool monitor) { PetscErrorCode ierr; PetscInt pgrid[3],cmax,fedegree,dof,addquadpts,nlevels,M_max,solve_type=0; PetscMPIInt nranks; Grid grid; DM dm; Vec U,V=NULL,F; Mat A=NULL; KSP ksp=NULL; MG mg=NULL; const char *solve_types[2] = {"fmg","ksp"}; PetscReal L[3]; PetscBool affine,ksp_only = PETSC_FALSE; #ifdef USE_HPM char eventname[256]; #endif PetscFunctionBegin; ierr = PetscOptionsBegin(comm,NULL,"KSP or FMG solver option",NULL);CHKERRQ(ierr); ierr = PetscOptionsEList("-solve_type","Solve with KSP or FMG","",solve_types,2,solve_types[0],&solve_type,NULL);CHKERRQ(ierr); if (solve_type) {ksp_only = PETSC_TRUE;} ierr = PetscOptionsEnd();CHKERRQ(ierr); ierr = OpGetFEDegree(op,&fedegree);CHKERRQ(ierr); ierr = OpGetDof(op,&dof);CHKERRQ(ierr); ierr = OpGetAddQuadPts(op,&addquadpts);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&nranks);CHKERRQ(ierr); ierr = ProcessGridFindSquarest(nranks,pgrid);CHKERRQ(ierr); // It would make sense to either use a different coarsening criteria (perhaps even specified by the sampler). On // large numbers of processes, the coarse grids should be square enough that 192 is a good threshold size. cmax = 192; ierr = GridCreate(comm,M,pgrid,cmax,&grid);CHKERRQ(ierr); ierr = GridGetNumLevels(grid,&nlevels);CHKERRQ(ierr); ierr = DMCreateFE(grid,fedegree,dof,addquadpts,&dm);CHKERRQ(ierr); M_max = PetscMax(M[0],PetscMax(M[1],M[2])); L[0] = M[0]*1./M_max; L[1] = M[1]*1./M_max; L[2] = M[2]*1./M_max; ierr = DMFESetUniformCoordinates(dm,L);CHKERRQ(ierr); ierr = OpGetAffineOnly(op,&affine);CHKERRQ(ierr); if (!affine) {ierr = DMCoordDistort(dm,L);CHKERRQ(ierr);} ierr = DMCreateGlobalVector(dm,&U);CHKERRQ(ierr); ierr = DMCreateGlobalVector(dm,&F);CHKERRQ(ierr); ierr = OpForcing(op,dm,F);CHKERRQ(ierr); if (!ksp_only) { ierr = MGCreate(op,dm,nlevels,&mg);CHKERRQ(ierr); ierr = MGMonitorSet(mg,monitor);CHKERRQ(ierr); ierr = MGSetUpPC(mg);CHKERRQ(ierr); } else { ierr = DMCreateGlobalVector(dm,&V);CHKERRQ(ierr); ierr = OpGetMat(op,dm,&A);CHKERRQ(ierr); ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr); ierr = KSPSetOperators(ksp,A,A);CHKERRQ(ierr); ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr); } #ifdef USE_HPM ierr = PetscSNPrintf(eventname,sizeof eventname,"Solve G[%D %D %D]",M[0],M[1],M[2]);CHKERRQ(ierr); HPM_Start(eventname); #endif PetscInt i = 0; PetscLogDouble sampletime = 0; while ( (i<nrepeat) || (sampletime < mintime) ) { PetscLogDouble t0,t1,elapsed,flops,eqs; ierr = VecZeroEntries(U);CHKERRQ(ierr); ierr = MPI_Barrier(comm);CHKERRQ(ierr); ierr = PetscTime(&t0);CHKERRQ(ierr); flops = petsc_TotalFlops; if (!ksp_only) { ierr = MGFCycle(op,mg,smooth[0],smooth[1],F,U);CHKERRQ(ierr); } else { ierr = KSPSolve(ksp,F,V);CHKERRQ(ierr); ierr = VecAXPY(V,-1.,U);CHKERRQ(ierr); } ierr = PetscTime(&t1);CHKERRQ(ierr); flops = petsc_TotalFlops - flops; elapsed = t1 - t0; ierr = MPI_Allreduce(MPI_IN_PLACE,&elapsed,1,MPI_DOUBLE,MPI_MAX,comm);CHKERRQ(ierr); ierr = MPI_Allreduce(MPI_IN_PLACE,&flops,1,MPI_DOUBLE,MPI_SUM,comm);CHKERRQ(ierr); eqs = (double)(M[0]*fedegree+1)*(M[1]*fedegree+1)*(M[2]*fedegree+1)*dof; ierr = PetscPrintf(comm,"Q%D G[%5D%5D%5D] P[%3D%3D%3D] %10.3e s %10f GF %10f MEq/s\n",fedegree,M[0],M[1],M[2],pgrid[0],pgrid[1],pgrid[2],t1-t0,flops/elapsed*1e-9,eqs/elapsed*1e-6);CHKERRQ(ierr); i++; sampletime += elapsed; } #ifdef USE_HPM HPM_Stop(eventname); #endif if (memused) {ierr = MemoryGetUsage(memused,memavail);CHKERRQ(ierr); } ierr = MGDestroy(&mg);CHKERRQ(ierr); ierr = KSPDestroy(&ksp);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = VecDestroy(&V);CHKERRQ(ierr); ierr = VecDestroy(&U);CHKERRQ(ierr); ierr = VecDestroy(&F);CHKERRQ(ierr); ierr = DMDestroy(&dm);CHKERRQ(ierr); ierr = GridDestroy(&grid);CHKERRQ(ierr); PetscFunctionReturn(0); }
//------------------------------------------------------------------------------------------------------------------------------ void hpgmg_setup(const int log2_box_dim, const int target_boxes_per_rank, const int OMP_Threads, const int OMP_Nested, const int requested_threading_model, const int actual_threading_model) { int my_rank=0; int num_tasks=1; #ifdef USE_MPI MPI_Comm_size(MPI_COMM_WORLD, &num_tasks); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); //if(actual_threading_model>requested_threading_model)actual_threading_model=requested_threading_model; if(my_rank==0){ if(requested_threading_model == MPI_THREAD_MULTIPLE )printf("Requested MPI_THREAD_MULTIPLE, "); else if(requested_threading_model == MPI_THREAD_SINGLE )printf("Requested MPI_THREAD_SINGLE, "); else if(requested_threading_model == MPI_THREAD_FUNNELED )printf("Requested MPI_THREAD_FUNNELED, "); else if(requested_threading_model == MPI_THREAD_SERIALIZED)printf("Requested MPI_THREAD_SERIALIZED, "); else if(requested_threading_model == MPI_THREAD_MULTIPLE )printf("Requested MPI_THREAD_MULTIPLE, "); else printf("Requested Unknown MPI Threading Model (%d), ",requested_threading_model); if(actual_threading_model == MPI_THREAD_MULTIPLE )printf("got MPI_THREAD_MULTIPLE\n"); else if(actual_threading_model == MPI_THREAD_SINGLE )printf("got MPI_THREAD_SINGLE\n"); else if(actual_threading_model == MPI_THREAD_FUNNELED )printf("got MPI_THREAD_FUNNELED\n"); else if(actual_threading_model == MPI_THREAD_SERIALIZED)printf("got MPI_THREAD_SERIALIZED\n"); else if(actual_threading_model == MPI_THREAD_MULTIPLE )printf("got MPI_THREAD_MULTIPLE\n"); else printf("got Unknown MPI Threading Model (%d)\n",actual_threading_model); } #endif if(log2_box_dim<4){ if(my_rank==0){printf("log2_box_dim must be at least 4\n");} #ifdef USE_MPI MPI_Finalize(); #endif exit(0); } if(target_boxes_per_rank<1){ if(my_rank==0){printf("target_boxes_per_rank must be at least 1\n");} #ifdef USE_MPI MPI_Finalize(); #endif exit(0); } if(my_rank==0){ if(OMP_Nested)fprintf(stdout,"%d MPI Tasks of %d threads (OMP_NESTED=TRUE)\n\n" ,num_tasks,OMP_Threads); else fprintf(stdout,"%d MPI Tasks of %d threads (OMP_NESTED=FALSE)\n\n",num_tasks,OMP_Threads); } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // calculate the problem size... #ifndef MAX_COARSE_DIM #define MAX_COARSE_DIM 11 #endif int64_t box_dim=1<<log2_box_dim; int64_t target_boxes = (int64_t)target_boxes_per_rank*(int64_t)num_tasks; int64_t boxes_in_i = -1; int64_t bi; for(bi=1;bi<1000;bi++){ // all possible problem sizes int64_t total_boxes = bi*bi*bi; if(total_boxes<=target_boxes){ int64_t coarse_grid_dim = box_dim*bi; while( (coarse_grid_dim%2) == 0){coarse_grid_dim=coarse_grid_dim/2;} if(coarse_grid_dim<=MAX_COARSE_DIM){ boxes_in_i = bi; } } } if(boxes_in_i<1){ if(my_rank==0){printf("failed to find an acceptable problem size\n");} #ifdef USE_MPI MPI_Finalize(); #endif exit(0); } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // create the fine level... #ifdef USE_PERIODIC_BC int bc = BC_PERIODIC; #else int bc = BC_DIRICHLET; #endif level_type fine_grid; int ghosts=stencil_get_radius(); create_level(&fine_grid,boxes_in_i,box_dim,ghosts,VECTORS_RESERVED,bc,my_rank,num_tasks); //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #ifdef USE_HELMHOLTZ double a=1.0;double b=1.0; // Helmholtz if(my_rank==0)fprintf(stdout," Creating Helmholtz (a=%f, b=%f) test problem\n",a,b); #else double a=0.0;double b=1.0; // Poisson if(my_rank==0)fprintf(stdout," Creating Poisson (a=%f, b=%f) test problem\n",a,b); #endif double h0=1.0/( (double)boxes_in_i*(double)box_dim ); initialize_problem(&fine_grid,h0,a,b); // calculate VECTOR_ALPHA, VECTOR_BETA, and VECTOR_UTRUE //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if( ((a==0.0)||(dot(&fine_grid,VECTOR_ALPHA,VECTOR_ALPHA)==0.0)) && (fine_grid.boundary_condition.type == BC_PERIODIC) ){ // Poisson w/ periodic BC's... // nominally, u shifted by any constant is still a valid solution. // However, by convention, we assume u sums to zero. double average_value_of_u = mean(&fine_grid,VECTOR_UTRUE); if(my_rank==0){fprintf(stdout," average value of u_true = %20.12e... shifting u_true to ensure it sums to zero...\n",average_value_of_u);} shift_vector(&fine_grid,VECTOR_UTRUE,VECTOR_UTRUE,-average_value_of_u); } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - //apply_op(&fine_grid,VECTOR_F,VECTOR_UTRUE,a,b); // by construction, f = A(u_true) //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if(fine_grid.boundary_condition.type == BC_PERIODIC){ double average_value_of_f = mean(&fine_grid,VECTOR_F); if(average_value_of_f!=0.0){ if(my_rank==0){fprintf(stderr," WARNING... Periodic boundary conditions, but f does not sum to zero... mean(f)=%e\n",average_value_of_f);} //shift_vector(&fine_grid,VECTOR_F,VECTOR_F,-average_value_of_f); } } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - mg_type all_grids; int minCoarseDim = 1; rebuild_operator(&fine_grid,NULL,a,b); // i.e. calculate Dinv and lambda_max MGBuild(&all_grids,&fine_grid,a,b,minCoarseDim); // build the Multigrid Hierarchy double dtol= 0.0;double rtol=1e-10; // converged if ||b-Ax|| / ||b|| < rtol //double dtol=1e-15;double rtol= 0.0; // converged if ||D^{-1}(b-Ax)|| < dtol //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - int doTiming; int minSolves = 10; // do at least minSolves MGSolves double timePerSolve = 0; for(doTiming=0;doTiming<=1;doTiming++){ // first pass warms up, second pass times #ifdef USE_HPM // IBM performance counters for BGQ... if(doTiming)HPM_Start("FMGSolve()"); #endif #ifdef USE_MPI double minTime = 30.0; // minimum time in seconds that the benchmark should run double startTime = MPI_Wtime(); if(doTiming==1){ if((minTime/timePerSolve)>minSolves)minSolves=(minTime/timePerSolve); // if one needs to do more than minSolves to run for minTime, change minSolves } #endif if(my_rank==0){ if(doTiming==0){fprintf(stdout,"\n\n===== warming up by running %d solves ===============================\n",minSolves);} else{fprintf(stdout,"\n\n===== running %d solves =============================================\n",minSolves);} fflush(stdout); } int numSolves = 0; // solves completed MGResetTimers(&all_grids); while( (numSolves<minSolves) ){ zero_vector(all_grids.levels[0],VECTOR_U); #ifdef USE_FCYCLES FMGSolve(&all_grids,0,VECTOR_U,VECTOR_F,a,b,dtol,rtol); #else MGSolve(&all_grids,0,VECTOR_U,VECTOR_F,a,b,dtol,rtol); #endif numSolves++; } #ifdef USE_MPI if(doTiming==0){ double endTime = MPI_Wtime(); timePerSolve = (endTime-startTime)/numSolves; MPI_Bcast(&timePerSolve,1,MPI_DOUBLE,0,MPI_COMM_WORLD); // after warmup, process 0 broadcasts the average time per solve (consensus) } #endif #ifdef USE_HPM // IBM performance counters for BGQ... if(doTiming)HPM_Stop("FMGSolve()"); #endif } MGPrintTiming(&all_grids); // don't include the error check in the timing results //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if(my_rank==0){fprintf(stdout,"calculating error... ");} double fine_error = error(&fine_grid,VECTOR_U,VECTOR_UTRUE); if(my_rank==0){fprintf(stdout,"h = %22.15e ||error|| = %22.15e\n\n",h0,fine_error);fflush(stdout);} //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // MGDestroy() //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #ifdef USE_MPI #ifdef USE_HPM // IBM performance counters for BGQ... HPM_Print(); #endif MPI_Finalize(); #endif //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - return; }
//------------------------------------------------------------------------------------------------------------------------------ int main(int argc, char **argv){ int my_rank=0; int num_tasks=1; int OMP_Threads = 1; int OMP_Nested = 0; #ifdef _OPENMP #pragma omp parallel { #pragma omp master { OMP_Threads = omp_get_num_threads(); OMP_Nested = omp_get_nested(); } } #endif #ifdef USE_MPI int actual_threading_model = -1; int requested_threading_model = -1; requested_threading_model = MPI_THREAD_SINGLE; //requested_threading_model = MPI_THREAD_FUNNELED; //requested_threading_model = MPI_THREAD_SERIALIZED; //requested_threading_model = MPI_THREAD_MULTIPLE; //MPI_Init(&argc, &argv); #ifdef _OPENMP requested_threading_model = MPI_THREAD_FUNNELED; //requested_threading_model = MPI_THREAD_SERIALIZED; //requested_threading_model = MPI_THREAD_MULTIPLE; //MPI_Init_thread(&argc, &argv, requested_threading_model, &actual_threading_model); #endif MPI_Init_thread(&argc, &argv, requested_threading_model, &actual_threading_model); MPI_Comm_size(MPI_COMM_WORLD, &num_tasks); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); //if(actual_threading_model>requested_threading_model)actual_threading_model=requested_threading_model; if(my_rank==0){ if(requested_threading_model == MPI_THREAD_MULTIPLE )printf("Requested MPI_THREAD_MULTIPLE, "); else if(requested_threading_model == MPI_THREAD_SINGLE )printf("Requested MPI_THREAD_SINGLE, "); else if(requested_threading_model == MPI_THREAD_FUNNELED )printf("Requested MPI_THREAD_FUNNELED, "); else if(requested_threading_model == MPI_THREAD_SERIALIZED)printf("Requested MPI_THREAD_SERIALIZED, "); else if(requested_threading_model == MPI_THREAD_MULTIPLE )printf("Requested MPI_THREAD_MULTIPLE, "); else printf("Requested Unknown MPI Threading Model (%d), ",requested_threading_model); if(actual_threading_model == MPI_THREAD_MULTIPLE )printf("got MPI_THREAD_MULTIPLE\n"); else if(actual_threading_model == MPI_THREAD_SINGLE )printf("got MPI_THREAD_SINGLE\n"); else if(actual_threading_model == MPI_THREAD_FUNNELED )printf("got MPI_THREAD_FUNNELED\n"); else if(actual_threading_model == MPI_THREAD_SERIALIZED)printf("got MPI_THREAD_SERIALIZED\n"); else if(actual_threading_model == MPI_THREAD_MULTIPLE )printf("got MPI_THREAD_MULTIPLE\n"); else printf("got Unknown MPI Threading Model (%d)\n",actual_threading_model); } #ifdef USE_HPM // IBM HPM counters for BGQ... HPM_Init(); #endif #endif // USE_MPI int log2_box_dim = 6; int target_boxes_per_rank = 1; if(argc==3){ log2_box_dim=atoi(argv[1]); target_boxes_per_rank=atoi(argv[2]); }else{ if(my_rank==0){printf("usage: ./a.out [log2_box_dim] [target_boxes_per_rank]\n");} #ifdef USE_MPI MPI_Finalize(); #endif exit(0); } if(log2_box_dim<4){ if(my_rank==0){printf("log2_box_dim must be at least 4\n");} #ifdef USE_MPI MPI_Finalize(); #endif exit(0); } if(target_boxes_per_rank<1){ if(my_rank==0){printf("target_boxes_per_rank must be at least 1\n");} #ifdef USE_MPI MPI_Finalize(); #endif exit(0); } if(my_rank==0){ if(OMP_Nested)fprintf(stdout,"%d MPI Tasks of %d threads (OMP_NESTED=TRUE)\n\n" ,num_tasks,OMP_Threads); else fprintf(stdout,"%d MPI Tasks of %d threads (OMP_NESTED=FALSE)\n\n",num_tasks,OMP_Threads); } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // calculate the problem size... #ifndef MAX_COARSE_DIM #define MAX_COARSE_DIM 11 #endif int64_t box_dim=1<<log2_box_dim; int64_t target_boxes = (int64_t)target_boxes_per_rank*(int64_t)num_tasks; int64_t boxes_in_i = -1; int64_t bi; for(bi=1;bi<1000;bi++){ // all possible problem sizes int64_t total_boxes = bi*bi*bi; if(total_boxes<=target_boxes){ int64_t coarse_grid_dim = box_dim*bi; while( (coarse_grid_dim%2) == 0){coarse_grid_dim=coarse_grid_dim/2;} if(coarse_grid_dim<=MAX_COARSE_DIM){ boxes_in_i = bi; } } } if(boxes_in_i<1){ if(my_rank==0){printf("failed to find an acceptable problem size\n");} #ifdef USE_MPI MPI_Finalize(); #endif exit(0); } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // create the fine level... #ifdef USE_PERIODIC_BC int bc = BC_PERIODIC; #else int bc = BC_DIRICHLET; #endif level_type fine_grid; int ghosts=stencil_get_radius(); create_level(&fine_grid,boxes_in_i,box_dim,ghosts,VECTORS_RESERVED,bc,my_rank,num_tasks); //create_level(&fine_grid,boxes_in_i,box_dim,ghosts,VECTORS_RESERVED,BC_PERIODIC ,my_rank,num_tasks);double h0=1.0/( (double)boxes_in_i*(double)box_dim );double a=2.0;double b=1.0; // Helmholtz w/Periodic //create_level(&fine_grid,boxes_in_i,box_dim,ghosts,VECTORS_RESERVED,BC_PERIODIC ,my_rank,num_tasks);double h0=1.0/( (double)boxes_in_i*(double)box_dim );double a=0.0;double b=1.0; // Poisson w/Periodic //create_level(&fine_grid,boxes_in_i,box_dim,ghosts,VECTORS_RESERVED,BC_DIRICHLET,my_rank,num_tasks);double h0=1.0/( (double)boxes_in_i*(double)box_dim );double a=2.0;double b=1.0; // Helmholtz w/Dirichlet //create_level(&fine_grid,boxes_in_i,box_dim,ghosts,VECTORS_RESERVED,BC_DIRICHLET,my_rank,num_tasks);double h0=1.0/( (double)boxes_in_i*(double)box_dim );double a=0.0;double b=1.0; // Poisson w/Dirichlet //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #ifdef USE_HELMHOLTZ double a=2.0;double b=1.0; // Helmholtz if(my_rank==0)fprintf(stdout," Creating Helmholtz (a=%f, b=%f) test problem\n",a,b); #else double a=0.0;double b=1.0; // Poisson if(my_rank==0)fprintf(stdout," Creating Poisson (a=%f, b=%f) test problem\n",a,b); #endif double h0=1.0/( (double)boxes_in_i*(double)box_dim ); initialize_problem(&fine_grid,h0,a,b); rebuild_operator(&fine_grid,NULL,a,b); // i.e. calculate Dinv and lambda_max //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - mg_type all_grids; int minCoarseDim = 1; MGBuild(&all_grids,&fine_grid,a,b,minCoarseDim); // build the Multigrid Hierarchy //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - int doTiming; int minSolves = 10; // do at least minSolves MGSolves double timePerSolve = 0; for(doTiming=0;doTiming<=1;doTiming++){ // first pass warms up, second pass times #ifdef USE_HPM // IBM performance counters for BGQ... if(doTiming)HPM_Start("FMGSolve()"); #endif #ifdef USE_MPI double minTime = 20.0; // minimum time in seconds that the benchmark should run double startTime = MPI_Wtime(); if(doTiming==1){ if((minTime/timePerSolve)>minSolves)minSolves=(minTime/timePerSolve); // if one needs to do more than minSolves to run for minTime, change minSolves } #endif if(my_rank==0){ if(doTiming==0){fprintf(stdout,"\n\n===== warming up by running %d solves ===============================\n",minSolves);} else{fprintf(stdout,"\n\n===== running %d solves =============================================\n",minSolves);} fflush(stdout); } int numSolves = 0; // solves completed MGResetTimers(&all_grids); while( (numSolves<minSolves) ){ zero_vector(all_grids.levels[0],VECTOR_U); #ifdef USE_FCYCLES FMGSolve(&all_grids,VECTOR_U,VECTOR_F,a,b,1e-15); #else MGSolve(&all_grids,VECTOR_U,VECTOR_F,a,b,1e-15); #endif numSolves++; } #ifdef USE_MPI if(doTiming==0){ double endTime = MPI_Wtime(); timePerSolve = (endTime-startTime)/numSolves; MPI_Bcast(&timePerSolve,1,MPI_DOUBLE,0,MPI_COMM_WORLD); // after warmup, process 0 broadcasts the average time per solve (consensus) } #endif #ifdef USE_HPM // IBM performance counters for BGQ... if(doTiming)HPM_Stop("FMGSolve()"); #endif } MGPrintTiming(&all_grids); // don't include the error check in the timing results //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if(my_rank==0){fprintf(stdout,"calculating error... ");} double fine_error = error(&fine_grid,VECTOR_U,VECTOR_UTRUE); if(my_rank==0){fprintf(stdout,"h = %22.15e ||error|| = %22.15e\n\n",h0,fine_error);fflush(stdout);} //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // MGDestroy() //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #ifdef USE_MPI #ifdef USE_HPM // IBM performance counters for BGQ... HPM_Print(); #endif MPI_Finalize(); #endif //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - return(0); }