int main(int argc, char *argv[]) { int rank, nbProcs, nbLines, i, M, arg; double wtime, *h, *g, memSize, localerror, globalerror = 1; MPI_Init(&argc, &argv); FTI_Init(argv[2], MPI_COMM_WORLD); MPI_Comm_size(FTI_COMM_WORLD, &nbProcs); MPI_Comm_rank(FTI_COMM_WORLD, &rank); arg = atoi(argv[1]); M = (int)sqrt((double)(arg * 1024.0 * 512.0 * nbProcs)/sizeof(double)); nbLines = (M / nbProcs)+3; h = (double *) malloc(sizeof(double *) * M * nbLines); g = (double *) malloc(sizeof(double *) * M * nbLines); initData(nbLines, M, rank, g); memSize = M * nbLines * 2 * sizeof(double) / (1024 * 1024); if (rank == 0) { printf("Local data size is %d x %d = %f MB (%d).\n", M, nbLines, memSize, arg); printf("Target precision : %f \n", PRECISION); printf("Maximum number of iterations : %d \n", ITER_TIMES); } FTI_Protect(0, &i, 1, FTI_INTG); FTI_Protect(1, h, M*nbLines, FTI_DBLE); FTI_Protect(2, g, M*nbLines, FTI_DBLE); wtime = MPI_Wtime(); for (i = 0; i < ITER_TIMES; i++) { int checkpointed = FTI_Snapshot(); localerror = doWork(nbProcs, rank, M, nbLines, g, h); if (((i%ITER_OUT) == 0) && (rank == 0)) { printf("Step : %d, error = %f\n", i, globalerror); } if ((i%REDUCE) == 0) { MPI_Allreduce(&localerror, &globalerror, 1, MPI_DOUBLE, MPI_MAX, FTI_COMM_WORLD); } if(globalerror < PRECISION) { break; } } if (rank == 0) { printf("Execution finished in %lf seconds.\n", MPI_Wtime() - wtime); } free(h); free(g); FTI_Finalize(); MPI_Finalize(); return 0; }
void init( dcp_info_t * info, unsigned long alloc_size ) { int wsize; MPI_Comm_size(MPI_COMM_WORLD, &wsize); MPI_Comm_rank(MPI_COMM_WORLD, &grank); dictionary* ini; if (access("config.fti", R_OK) == 0) { ini = iniparser_load("config.fti"); if (ini == NULL) { WARN_MSG("failed to parse FTI config file!"); exit(EXIT_FAILURE); } } else { EXIT_STD_ERR("cannot access FTI config file!"); } finalTag = (int)iniparser_getint(ini, "Advanced:final_tag", 3107); numHeads = (int)iniparser_getint(ini, "Basic:head", 0); int nodeSize = (int)iniparser_getint(ini, "Basic:node_size", -1); headRank = grank - grank%nodeSize; char* env = getenv( "TEST_MODE" ); if( env ) { if( strcmp( env, "ICP" ) == 0 ) { info->test_mode = TEST_ICP; INFO_MSG("TEST MODE -> ICP"); } else if ( strcmp( env, "NOICP") == 0 ) { info->test_mode = TEST_NOICP; INFO_MSG("TEST MODE -> NOICP"); } else { info->test_mode = TEST_NOICP; INFO_MSG("TEST MODE -> NOICP"); } } else { info->test_mode = TEST_NOICP; INFO_MSG("TEST MODE -> NOICP"); } //DBG_MSG("alloc_size: %lu",0,alloc_size); init_share(); // init pattern pat = (uint32_t) rand(); // protect pattern and xor_info FTI_InitType( &FTI_UI, UI_UNIT ); FTI_Protect( PAT_ID, &pat, 1, FTI_UI ); FTI_InitType( &FTI_XOR_INFO, sizeof(xor_info_t) ); FTI_Protect( XOR_INFO_ID, info->xor_info, NUM_DCKPT, FTI_XOR_INFO ); FTI_Protect( NBUFFER_ID, &info->nbuffer, 1, FTI_INTG ); // check if alloc_size is sufficiant large if ( alloc_size < 101 ) EXIT_CFG_ERR("insufficiant allocation size"); // determine number of buffers usleep(5000*grank); srand(get_seed()); if ( FTI_Status() == 0 ) { info->nbuffer = rand()%10+1; } else { FTI_RecoverVar( NBUFFER_ID ); } // initialize structure info->buffer = (void**) malloc(info->nbuffer*sizeof(void*)); info->size = (unsigned long*) malloc(info->nbuffer*sizeof(unsigned long)); info->oldsize = (unsigned long*) malloc(info->nbuffer*sizeof(unsigned long)); info->hash = (unsigned char**) malloc(info->nbuffer*sizeof(unsigned char*)); int idx; for ( idx=0; idx<info->nbuffer; ++idx ) { info->buffer[idx] = NULL; info->hash[idx] = (unsigned char*) malloc(MD5_DIGEST_LENGTH); } allocate_buffers( info, alloc_size ); generate_data( info ); init_srand(); }
void protect_buffers( dcp_info_t *info ) { int idx; for ( idx=0; idx<info->nbuffer; ++idx ) { FTI_Protect( idx, info->buffer[idx], info->size[idx], FTI_CHAR ); } }
/*-------------------------------------------------------------------------*/ int main(int argc, char** argv) { int fail, rank, nbProcs, nbLines, i, M, arg; double wtime, *h, *g, memSize, localerror, globalerror = 1; if (init(argv, &fail)) { return 0; //verify args } MPI_Init(&argc, &argv); FTI_Init(argv[1], MPI_COMM_WORLD); MPI_Comm_size(FTI_COMM_WORLD, &nbProcs); MPI_Comm_rank(FTI_COMM_WORLD, &rank); arg = 4; M = (int)sqrt((double)(arg * 1024.0 * 512.0 * nbProcs)/sizeof(double)); nbLines = (M / nbProcs)+3; h = (double *) malloc(sizeof(double *) * M * nbLines); g = (double *) malloc(sizeof(double *) * M * nbLines); initData(nbLines, M, rank, g); memSize = M * nbLines * 2 * sizeof(double) / (1024 * 1024); if (rank == 0) { printf("Local data size is %d x %d = %f MB (%d).\n", M, nbLines, memSize, arg); printf("Target precision : %f \n", PRECISION); printf("Maximum number of iterations : %d \n", ITER_TIMES); } //adding variables to protect FTI_Protect(0, &i, 1, FTI_INTG); FTI_Protect(1, h, M*nbLines, FTI_DBLE); FTI_Protect(2, g, M*nbLines, FTI_DBLE); int iTmp = 0; wtime = MPI_Wtime(); for (i = 0; i < ITER_TIMES; i++) { iTmp = i; int checkpointed = FTI_Snapshot(); if (!(checkpointed != FTI_SCES || checkpointed != FTI_DONE)) { printf("%d: Snapshot failed! Returned %d.\n", rank, checkpointed); free(h); free(g); FTI_Finalize(); MPI_Finalize(); return 1; } else if (rank == 0 && checkpointed == FTI_DONE) { printf("Checkpoint made i = %d\n", i); } else if (rank == 0 && checkpointed == FTI_SCES && i != iTmp) { printf("Recovered! i = %d\n", i); } localerror = doWork(nbProcs, rank, M, nbLines, g, h); if (((i%ITER_OUT) == 0) && (rank == 0)) { printf("Step : %d, error = %f\n", i, globalerror); } if ((i%REDUCE) == 0) { MPI_Allreduce(&localerror, &globalerror, 1, MPI_DOUBLE, MPI_MAX, FTI_COMM_WORLD); } if (globalerror < PRECISION) { break; } if (fail && i >= ITER_STOP) { printf("%d: Stoped at i = %d.\n", rank, i); break; } } if (rank == 0) { printf("Execution finished in %lf seconds. Error = %f\n", MPI_Wtime() - wtime, globalerror); } int rtn = 0; //return value if (!fail) { rtn = verify(globalerror, rank); } free(h); free(g); FTI_Finalize(); MPI_Finalize(); return rtn; }
int main(int argc, char **argv) { char myhost[256]; real_t dt = 0; int nvtk = 0; char outnum[80]; int time_output = 0; long flops = 0; // real_t output_time = 0.0; real_t next_output_time = 0; double start_time = 0, end_time = 0; double start_iter = 0, end_iter = 0; double elaps = 0; struct timespec start, end; double cellPerCycle = 0; double avgCellPerCycle = 0; long nbCycle = 0; // array of timers to profile the code memset(functim, 0, TIM_END * sizeof(functim[0])); #ifdef MPI MPI_Init(&argc, &argv); #endif process_args(argc, argv, &H); hydro_init(&H, &Hv); if (H.mype == 0) fprintf(stdout, "Hydro starts in %s precision.\n", ((sizeof(real_t) == sizeof(double))? "double": "single")); gethostname(myhost, 255); if (H.mype == 0) { fprintf(stdout, "Hydro: Main process running on %s\n", myhost); } #ifdef _OPENMP if (H.mype == 0) { fprintf(stdout, "Hydro: OpenMP mode ON\n"); fprintf(stdout, "Hydro: OpenMP %d max threads\n", omp_get_max_threads()); fprintf(stdout, "Hydro: OpenMP %d num threads\n", omp_get_num_threads()); fprintf(stdout, "Hydro: OpenMP %d num procs\n", omp_get_num_procs()); } #endif #ifdef MPI if (H.mype == 0) { fprintf(stdout, "Hydro: MPI run with %d procs\n", H.nproc); } #else fprintf(stdout, "Hydro: standard build\n"); #endif // PRINTUOLD(H, &Hv); #ifdef MPI if (H.nproc > 1) #if FTI>0 MPI_Barrier(FTI_COMM_WORLD); #endif #if FTI==0 MPI_Barrier(MPI_COMM_WORLD); #endif #endif if (H.dtoutput > 0) { // outputs are in physical time not in time steps time_output = 1; next_output_time = next_output_time + H.dtoutput; } if (H.dtoutput > 0 || H.noutput > 0) vtkfile(++nvtk, H, &Hv); if (H.mype == 0) fprintf(stdout, "Hydro starts main loop.\n"); //pre-allocate memory before entering in loop //For godunov scheme start = cclock(); start = cclock(); allocate_work_space(H.nxyt, H, &Hw_godunov, &Hvw_godunov); compute_deltat_init_mem(H, &Hw_deltat, &Hvw_deltat); end = cclock(); #ifdef MPI #if FTI==1 FTI_Protect(0,functim, TIM_END,FTI_DBLE); FTI_Protect(1,&nvtk,1,FTI_INTG); FTI_Protect(2,&next_output_time,1,FTI_DBLE); FTI_Protect(3,&dt,1,FTI_DBLE); FTI_Protect(4,&MflopsSUM,1,FTI_DBLE); FTI_Protect(5,&nbFLOPS,1,FTI_LONG); FTI_Protect(6,&(H.nstep),1,FTI_INTG); FTI_Protect(7,&(H.t),1,FTI_DBLE); FTI_Protect(8,Hv.uold,H.nvar * H.nxt * H.nyt,FTI_DBLE); #endif #endif if (H.mype == 0) fprintf(stdout, "Hydro: init mem %lfs\n", ccelaps(start, end)); // we start timings here to avoid the cost of initial memory allocation start_time = dcclock(); while ((H.t < H.tend) && (H.nstep < H.nstepmax)) { //system("top -b -n1"); // reset perf counter for this iteration flopsAri = flopsSqr = flopsMin = flopsTra = 0; start_iter = dcclock(); outnum[0] = 0; if ((H.nstep % 2) == 0) { dt = 0; // if (H.mype == 0) fprintf(stdout, "Hydro computes deltat.\n"); start = cclock(); compute_deltat(&dt, H, &Hw_deltat, &Hv, &Hvw_deltat); end = cclock(); functim[TIM_COMPDT] += ccelaps(start, end); if (H.nstep == 0) { dt = dt / 2.0; if (H.mype == 0) fprintf(stdout, "Hydro computes initial deltat: %le\n", dt); } #ifdef MPI if (H.nproc > 1) { real_t dtmin; // printf("pe=%4d\tdt=%lg\n",H.mype, dt); #if FTI==0 if (sizeof(real_t) == sizeof(double)) { MPI_Allreduce(&dt, &dtmin, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); } else { MPI_Allreduce(&dt, &dtmin, 1, MPI_FLOAT, MPI_MIN, MPI_COMM_WORLD); } #endif #if FTI>0 if (sizeof(real_t) == sizeof(double)) { MPI_Allreduce(&dt, &dtmin, 1, MPI_DOUBLE, MPI_MIN, FTI_COMM_WORLD); } else { MPI_Allreduce(&dt, &dtmin, 1, MPI_FLOAT, MPI_MIN, FTI_COMM_WORLD); } #endif dt = dtmin; } #endif } // dt = 1.e-3; // if (H.mype == 1) fprintf(stdout, "Hydro starts godunov.\n"); if ((H.nstep % 2) == 0) { hydro_godunov(1, dt, H, &Hv, &Hw_godunov, &Hvw_godunov); // hydro_godunov(2, dt, H, &Hv, &Hw, &Hvw); } else { hydro_godunov(2, dt, H, &Hv, &Hw_godunov, &Hvw_godunov); // hydro_godunov(1, dt, H, &Hv, &Hw, &Hvw); } end_iter = dcclock(); cellPerCycle = (double) (H.globnx * H.globny) / (end_iter - start_iter) / 1000000.0L; avgCellPerCycle += cellPerCycle; nbCycle++; H.nstep++; H.t += dt; { real_t iter_time = (real_t) (end_iter - start_iter); #ifdef MPI long flopsAri_t, flopsSqr_t, flopsMin_t, flopsTra_t; start = cclock(); #if FTI==0 MPI_Allreduce(&flopsAri, &flopsAri_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&flopsSqr, &flopsSqr_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&flopsMin, &flopsMin_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&flopsTra, &flopsTra_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); #endif #if FTI>0 MPI_Allreduce(&flopsAri, &flopsAri_t, 1, MPI_LONG, MPI_SUM, FTI_COMM_WORLD); MPI_Allreduce(&flopsSqr, &flopsSqr_t, 1, MPI_LONG, MPI_SUM, FTI_COMM_WORLD); MPI_Allreduce(&flopsMin, &flopsMin_t, 1, MPI_LONG, MPI_SUM, FTI_COMM_WORLD); MPI_Allreduce(&flopsTra, &flopsTra_t, 1, MPI_LONG, MPI_SUM, FTI_COMM_WORLD); #endif // if (H.mype == 1) // printf("%ld %ld %ld %ld %ld %ld %ld %ld \n", flopsAri, flopsSqr, flopsMin, flopsTra, flopsAri_t, flopsSqr_t, flopsMin_t, flopsTra_t); flops = flopsAri_t * FLOPSARI + flopsSqr_t * FLOPSSQR + flopsMin_t * FLOPSMIN + flopsTra_t * FLOPSTRA; end = cclock(); functim[TIM_ALLRED] += ccelaps(start, end); #else flops = flopsAri * FLOPSARI + flopsSqr * FLOPSSQR + flopsMin * FLOPSMIN + flopsTra * FLOPSTRA; #endif nbFLOPS++; if (flops > 0) { if (iter_time > 1.e-9) { double mflops = (double) flops / (double) 1.e+6 / iter_time; MflopsSUM += mflops; sprintf(outnum, "%s {%.2f Mflops %ld Ops} (%.3fs)", outnum, mflops, flops, iter_time); } } else { sprintf(outnum, "%s (%.3fs)", outnum, iter_time); } } if (time_output == 0 && H.noutput > 0) { if ((H.nstep % H.noutput) == 0) { vtkfile(++nvtk, H, &Hv); sprintf(outnum, "%s [%04d]", outnum, nvtk); } } else { if (time_output == 1 && H.t >= next_output_time) { vtkfile(++nvtk, H, &Hv); next_output_time = next_output_time + H.dtoutput; sprintf(outnum, "%s [%04d]", outnum, nvtk); } } if (H.mype == 0) { fprintf(stdout, "--> step=%4d, %12.5e, %10.5e %.3lf MC/s%s\n", H.nstep, H.t, dt, cellPerCycle, outnum); fflush(stdout); } #ifdef MPI #if FTI==1 FTI_Snapshot(); #endif #endif } // while end_time = dcclock(); // Deallocate work spaces deallocate_work_space(H.nxyt, H, &Hw_godunov, &Hvw_godunov); compute_deltat_clean_mem(H, &Hw_deltat, &Hvw_deltat); hydro_finish(H, &Hv); elaps = (double) (end_time - start_time); timeToString(outnum, elaps); if (H.mype == 0) { fprintf(stdout, "Hydro ends in %ss (%.3lf) <%.2lf MFlops>.\n", outnum, elaps, (float) (MflopsSUM / nbFLOPS)); fprintf(stdout, " "); } if (H.nproc == 1) { int sizeFmt = sizeLabel(functim, TIM_END); printTimingsLabel(TIM_END, sizeFmt); fprintf(stdout, "\n"); if (sizeof(real_t) == sizeof(double)) { fprintf(stdout, "PE0_DP "); } else { fprintf(stdout, "PE0_SP "); } printTimings(functim, TIM_END, sizeFmt); fprintf(stdout, "\n"); fprintf(stdout, "%% "); percentTimings(functim, TIM_END); printTimings(functim, TIM_END, sizeFmt); fprintf(stdout, "\n"); } #ifdef MPI if (H.nproc > 1) { double timMAX[TIM_END]; double timMIN[TIM_END]; double timSUM[TIM_END]; #if FTI==0 MPI_Allreduce(functim, timMAX, TIM_END, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(functim, timMIN, TIM_END, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); MPI_Allreduce(functim, timSUM, TIM_END, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #endif #if FTI>0 MPI_Allreduce(functim, timMAX, TIM_END, MPI_DOUBLE, MPI_MAX, FTI_COMM_WORLD); MPI_Allreduce(functim, timMIN, TIM_END, MPI_DOUBLE, MPI_MIN, FTI_COMM_WORLD); MPI_Allreduce(functim, timSUM, TIM_END, MPI_DOUBLE, MPI_SUM, FTI_COMM_WORLD); #endif if (H.mype == 0) { int sizeFmt = sizeLabel(timMAX, TIM_END); printTimingsLabel(TIM_END, sizeFmt); fprintf(stdout, "\n"); fprintf(stdout, "MIN "); printTimings(timMIN, TIM_END, sizeFmt); fprintf(stdout, "\n"); fprintf(stdout, "MAX "); printTimings(timMAX, TIM_END, sizeFmt); fprintf(stdout, "\n"); fprintf(stdout, "AVG "); avgTimings(timSUM, TIM_END, H.nproc); printTimings(timSUM, TIM_END, sizeFmt); fprintf(stdout, "\n"); } } #endif if (H.mype == 0) { fprintf(stdout, "Average MC/s: %.3lf\n", (double)(avgCellPerCycle / nbCycle)); } #ifdef MPI #if FTI>0 FTI_Finalize(); #endif MPI_Finalize(); #endif return 0; }
int main ( int argc, char *argv[]){ int i; int state; int sizeOfDimension; int success = 1; int FTI_APP_RANK; herr_t status; threeD ***ptr = allocateLinearMemory(XSIZE, YSIZE, ZSIZE ); threeD *devPtr; int result; MPI_Init(&argc, &argv); result = FTI_Init(argv[1], MPI_COMM_WORLD); if (result == FTI_NREC) { exit(RECOVERY_FAILED); } int crash = atoi(argv[2]); int level = atoi(argv[3]); memset(&ptr[0][0][0],0, sizeof(threeD) * (XSIZE * YSIZE * ZSIZE)); int numGpus = getProperties(); MPI_Comm_rank(FTI_COMM_WORLD,&FTI_APP_RANK); setDevice(FTI_APP_RANK%numGpus); dictionary *ini = iniparser_load( argv[1] ); int grank; MPI_Comm_rank(MPI_COMM_WORLD,&grank); int nbHeads = (int)iniparser_getint(ini, "Basic:head", -1); int finalTag = (int)iniparser_getint(ini, "Advanced:final_tag", 3107); int nodeSize = (int)iniparser_getint(ini, "Basic:node_size", -1); int headRank = grank - grank%nodeSize; FTIT_complexType coordinateDef; FTIT_type threeDType; FTI_AddSimpleField( &coordinateDef, &FTI_INTG, offsetof( threeD, x),0, "X"); FTI_AddSimpleField( &coordinateDef, &FTI_INTG, offsetof( threeD, y),1, "y"); FTI_AddSimpleField( &coordinateDef, &FTI_INTG, offsetof( threeD, z),2, "z"); FTI_AddSimpleField( &coordinateDef, &FTI_INTG, offsetof( threeD, id),3, "id"); FTI_InitComplexType(&threeDType, &coordinateDef, 4 , sizeof(threeD), "ThreeD", NULL); if ( (nbHeads<0) || (nodeSize<0) ) { printf("wrong configuration (for head or node-size settings)! %d %d\n",nbHeads, nodeSize); MPI_Abort(MPI_COMM_WORLD, -1); } allocateMemory((void **) &devPtr, (XSIZE * YSIZE * ZSIZE*sizeof(threeD))); FTI_Protect(0, devPtr, (XSIZE * YSIZE * ZSIZE),threeDType); int dimLength[3] = {ZSIZE,YSIZE,XSIZE}; if (grank == 0) for ( i =0 ; i < 3; i++){ printf("Dimension is %d size is %d\n", dimLength[i], XSIZE*YSIZE*ZSIZE*sizeof(threeDType) / (1024*1024)); } FTI_DefineDataset(0, 3, dimLength , "GPU TOPOLOGY" , NULL); state = FTI_Status(); if ( state == INIT ){ executeKernel(devPtr); FTI_Checkpoint(1,level); if ( crash ) { if( nbHeads > 0 ) { int value = FTI_ENDW; MPI_Send(&value, 1, MPI_INT, headRank, finalTag, MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); } MPI_Finalize(); exit(0); } }else{ result = FTI_Recover(); if (result != FTI_SCES) { exit(RECOVERY_FAILED); } hostCopy(devPtr, &ptr[0][0][0],(XSIZE * YSIZE * ZSIZE*sizeof(threeD))); } threeD ***validationMemory= allocateLinearMemory(XSIZE, YSIZE, ZSIZE ); initData(&validationMemory[0][0][0]); if (state == RESTART || state == KEEP) { int tmp; result = memcmp(&validationMemory[0][0][0], &ptr[0][0][0],(XSIZE * YSIZE * ZSIZE*sizeof(threeD))); MPI_Allreduce(&result, &tmp, 1, MPI_INT, MPI_SUM, FTI_COMM_WORLD); result = tmp; } deallocateLinearMemory(ZSIZE , ptr); deallocateLinearMemory(ZSIZE , validationMemory); freeCuda(devPtr); if (FTI_APP_RANK == 0 && (state == RESTART || state == KEEP)) { if (result == 0) { printf("[SUCCESSFUL]\n"); } else { printf("[NOT SUCCESSFUL]\n"); success=0; } } MPI_Barrier(FTI_COMM_WORLD); FTI_Finalize(); MPI_Finalize(); if (success == 1) return 0; else exit(DATA_CORRUPT); }