コード例 #1
0
ファイル: heatdis.c プロジェクト: leobago/fti
int main(int argc, char *argv[])
{
    int rank, nbProcs, nbLines, i, M, arg;
    double wtime, *h, *g, memSize, localerror, globalerror = 1;

    MPI_Init(&argc, &argv);
    FTI_Init(argv[2], MPI_COMM_WORLD);

    MPI_Comm_size(FTI_COMM_WORLD, &nbProcs);
    MPI_Comm_rank(FTI_COMM_WORLD, &rank);

    arg = atoi(argv[1]);
    M = (int)sqrt((double)(arg * 1024.0 * 512.0 * nbProcs)/sizeof(double));
    nbLines = (M / nbProcs)+3;
    h = (double *) malloc(sizeof(double *) * M * nbLines);
    g = (double *) malloc(sizeof(double *) * M * nbLines);
    initData(nbLines, M, rank, g);
    memSize = M * nbLines * 2 * sizeof(double) / (1024 * 1024);

    if (rank == 0) {
        printf("Local data size is %d x %d = %f MB (%d).\n", M, nbLines, memSize, arg);
        printf("Target precision : %f \n", PRECISION);
        printf("Maximum number of iterations : %d \n", ITER_TIMES);
    }

    FTI_Protect(0, &i, 1, FTI_INTG);
    FTI_Protect(1, h, M*nbLines, FTI_DBLE);
    FTI_Protect(2, g, M*nbLines, FTI_DBLE);

    wtime = MPI_Wtime();
    for (i = 0; i < ITER_TIMES; i++) {
        int checkpointed = FTI_Snapshot();
        localerror = doWork(nbProcs, rank, M, nbLines, g, h);
        if (((i%ITER_OUT) == 0) && (rank == 0)) {
            printf("Step : %d, error = %f\n", i, globalerror);
        }
        if ((i%REDUCE) == 0) {
            MPI_Allreduce(&localerror, &globalerror, 1, MPI_DOUBLE, MPI_MAX, FTI_COMM_WORLD);
        }
        if(globalerror < PRECISION) {
            break;
        }
    }
    if (rank == 0) {
        printf("Execution finished in %lf seconds.\n", MPI_Wtime() - wtime);
    }

    free(h);
    free(g);

    FTI_Finalize();
    MPI_Finalize();
    return 0;
}
コード例 #2
0
ファイル: diff_test_func.c プロジェクト: leobago/fti
void init( dcp_info_t * info, unsigned long alloc_size ) {

    int wsize;
    MPI_Comm_size(MPI_COMM_WORLD, &wsize);
    MPI_Comm_rank(MPI_COMM_WORLD, &grank);
    
    dictionary* ini;

    if (access("config.fti", R_OK) == 0) {
        ini = iniparser_load("config.fti");
        if (ini == NULL) {
            WARN_MSG("failed to parse FTI config file!");
            exit(EXIT_FAILURE);
        }
    } else {
        EXIT_STD_ERR("cannot access FTI config file!");
    }

    finalTag = (int)iniparser_getint(ini, "Advanced:final_tag", 3107);
    numHeads = (int)iniparser_getint(ini, "Basic:head", 0);
    int nodeSize = (int)iniparser_getint(ini, "Basic:node_size", -1);

    headRank = grank - grank%nodeSize;

    char* env = getenv( "TEST_MODE" );
    if( env ) {
        if( strcmp( env, "ICP" ) == 0 ) {
            info->test_mode = TEST_ICP;
            INFO_MSG("TEST MODE -> ICP");
        } else if ( strcmp( env, "NOICP") == 0 ) {
            info->test_mode = TEST_NOICP;
            INFO_MSG("TEST MODE -> NOICP");
        } else {
            info->test_mode = TEST_NOICP;
            INFO_MSG("TEST MODE -> NOICP");
        }
    } else {
        info->test_mode = TEST_NOICP;
        INFO_MSG("TEST MODE -> NOICP");
    }

    //DBG_MSG("alloc_size: %lu",0,alloc_size);
    init_share();
    
    // init pattern
    pat = (uint32_t) rand();
    
    // protect pattern and xor_info
    FTI_InitType( &FTI_UI, UI_UNIT ); 
    FTI_Protect( PAT_ID, &pat, 1, FTI_UI );  
    FTI_InitType( &FTI_XOR_INFO, sizeof(xor_info_t) ); 
    FTI_Protect( XOR_INFO_ID, info->xor_info, NUM_DCKPT, FTI_XOR_INFO );  
    FTI_Protect( NBUFFER_ID, &info->nbuffer,  1, FTI_INTG );  

    // check if alloc_size is sufficiant large
    if ( alloc_size < 101 ) EXIT_CFG_ERR("insufficiant allocation size"); 
    
    // determine number of buffers
    usleep(5000*grank);
    srand(get_seed());
    if ( FTI_Status() == 0 ) {
        info->nbuffer = rand()%10+1;
    } else {
        FTI_RecoverVar( NBUFFER_ID );
    }

    // initialize structure
    info->buffer = (void**) malloc(info->nbuffer*sizeof(void*));
    info->size = (unsigned long*) malloc(info->nbuffer*sizeof(unsigned long));
    info->oldsize = (unsigned long*) malloc(info->nbuffer*sizeof(unsigned long));
    info->hash = (unsigned char**) malloc(info->nbuffer*sizeof(unsigned char*));
    int idx;
    for ( idx=0; idx<info->nbuffer; ++idx ) {
        info->buffer[idx] = NULL;
        info->hash[idx] = (unsigned char*) malloc(MD5_DIGEST_LENGTH);
    }
    allocate_buffers( info, alloc_size );
    generate_data( info );
    init_srand();
}
コード例 #3
0
ファイル: diff_test_func.c プロジェクト: leobago/fti
void protect_buffers( dcp_info_t *info ) {
    int idx;
    for ( idx=0; idx<info->nbuffer; ++idx ) {
        FTI_Protect( idx, info->buffer[idx], info->size[idx], FTI_CHAR );
    }
}
コード例 #4
0
ファイル: heatdis.c プロジェクト: leobago/fti
/*-------------------------------------------------------------------------*/
int main(int argc, char** argv)
{
    int fail, rank, nbProcs, nbLines, i, M, arg;
    double wtime, *h, *g, memSize, localerror, globalerror = 1;

    if (init(argv, &fail)) {
        return 0;   //verify args
    }

    MPI_Init(&argc, &argv);
    FTI_Init(argv[1], MPI_COMM_WORLD);

    MPI_Comm_size(FTI_COMM_WORLD, &nbProcs);
    MPI_Comm_rank(FTI_COMM_WORLD, &rank);

    arg = 4;
    M = (int)sqrt((double)(arg * 1024.0 * 512.0 * nbProcs)/sizeof(double));
    nbLines = (M / nbProcs)+3;
    h = (double *) malloc(sizeof(double *) * M * nbLines);
    g = (double *) malloc(sizeof(double *) * M * nbLines);
    initData(nbLines, M, rank, g);
    memSize = M * nbLines * 2 * sizeof(double) / (1024 * 1024);

    if (rank == 0) {
        printf("Local data size is %d x %d = %f MB (%d).\n", M, nbLines, memSize, arg);
        printf("Target precision : %f \n", PRECISION);
        printf("Maximum number of iterations : %d \n", ITER_TIMES);
    }

    //adding variables to protect
    FTI_Protect(0, &i, 1, FTI_INTG);
    FTI_Protect(1, h, M*nbLines, FTI_DBLE);
    FTI_Protect(2, g, M*nbLines, FTI_DBLE);
    int iTmp = 0;
    wtime = MPI_Wtime();
    for (i = 0; i < ITER_TIMES; i++) {
        iTmp = i;
        int checkpointed = FTI_Snapshot();
        if (!(checkpointed != FTI_SCES || checkpointed != FTI_DONE)) {
            printf("%d: Snapshot failed! Returned %d.\n", rank, checkpointed);
            free(h);
            free(g);
            FTI_Finalize();
            MPI_Finalize();
            return 1;
        }
        else if (rank == 0 && checkpointed == FTI_DONE) {
            printf("Checkpoint made i = %d\n", i);
        }
        else if (rank == 0 && checkpointed == FTI_SCES && i != iTmp) {
            printf("Recovered! i = %d\n", i);
        }
        localerror = doWork(nbProcs, rank, M, nbLines, g, h);
        if (((i%ITER_OUT) == 0) && (rank == 0)) {
            printf("Step : %d, error = %f\n", i, globalerror);
        }
        if ((i%REDUCE) == 0) {
            MPI_Allreduce(&localerror, &globalerror, 1, MPI_DOUBLE, MPI_MAX, FTI_COMM_WORLD);
        }
        if (globalerror < PRECISION) {
            break;
        }
        if (fail && i >= ITER_STOP) {
            printf("%d: Stoped at i = %d.\n", rank, i);
            break;
        }
    }
    if (rank == 0) {
        printf("Execution finished in %lf seconds. Error = %f\n", MPI_Wtime() - wtime, globalerror);
    }

    int rtn = 0; //return value
    if (!fail) {
        rtn = verify(globalerror, rank);
    }

    free(h);
    free(g);

    FTI_Finalize();
    MPI_Finalize();
    return rtn;
}
コード例 #5
0
ファイル: main.c プロジェクト: RWTH-OS/Hydro
int
main(int argc, char **argv) {
  char myhost[256];
  real_t dt = 0;
  int nvtk = 0;
  char outnum[80];
  int time_output = 0;
  long flops = 0;

  // real_t output_time = 0.0;
  real_t next_output_time = 0;
  double start_time = 0, end_time = 0;
  double start_iter = 0, end_iter = 0;
  double elaps = 0;
  struct timespec start, end;
  double cellPerCycle = 0;
  double avgCellPerCycle = 0;
  long nbCycle = 0;

  // array of timers to profile the code
  memset(functim, 0, TIM_END * sizeof(functim[0]));

#ifdef MPI
  MPI_Init(&argc, &argv);
#endif

  process_args(argc, argv, &H);
  hydro_init(&H, &Hv);

  if (H.mype == 0)
    fprintf(stdout, "Hydro starts in %s precision.\n", ((sizeof(real_t) == sizeof(double))? "double": "single"));
  gethostname(myhost, 255);
  if (H.mype == 0) {
    fprintf(stdout, "Hydro: Main process running on %s\n", myhost);
  }

#ifdef _OPENMP
  if (H.mype == 0) {
    fprintf(stdout, "Hydro:    OpenMP mode ON\n");
    fprintf(stdout, "Hydro: OpenMP %d max threads\n", omp_get_max_threads());
    fprintf(stdout, "Hydro: OpenMP %d num threads\n", omp_get_num_threads());
    fprintf(stdout, "Hydro: OpenMP %d num procs\n", omp_get_num_procs());
  }
#endif
#ifdef MPI
  if (H.mype == 0) {
    fprintf(stdout, "Hydro: MPI run with %d procs\n", H.nproc);
  }
#else
  fprintf(stdout, "Hydro: standard build\n");
#endif


  // PRINTUOLD(H, &Hv);
#ifdef MPI
  if (H.nproc > 1)
#if FTI>0
    MPI_Barrier(FTI_COMM_WORLD);
#endif
#if FTI==0
    MPI_Barrier(MPI_COMM_WORLD);
#endif
#endif

  if (H.dtoutput > 0) {
    // outputs are in physical time not in time steps
    time_output = 1;
    next_output_time = next_output_time + H.dtoutput;
  }

  if (H.dtoutput > 0 || H.noutput > 0)
    vtkfile(++nvtk, H, &Hv);

  if (H.mype == 0)
    fprintf(stdout, "Hydro starts main loop.\n");

  //pre-allocate memory before entering in loop
  //For godunov scheme
  start = cclock();
  start = cclock();
  allocate_work_space(H.nxyt, H, &Hw_godunov, &Hvw_godunov);
  compute_deltat_init_mem(H, &Hw_deltat, &Hvw_deltat);
  end = cclock();
#ifdef MPI
#if FTI==1
  FTI_Protect(0,functim, TIM_END,FTI_DBLE);
  FTI_Protect(1,&nvtk,1,FTI_INTG);
  FTI_Protect(2,&next_output_time,1,FTI_DBLE);
  FTI_Protect(3,&dt,1,FTI_DBLE);
  FTI_Protect(4,&MflopsSUM,1,FTI_DBLE);
  FTI_Protect(5,&nbFLOPS,1,FTI_LONG);
  FTI_Protect(6,&(H.nstep),1,FTI_INTG);
  FTI_Protect(7,&(H.t),1,FTI_DBLE);
  FTI_Protect(8,Hv.uold,H.nvar * H.nxt * H.nyt,FTI_DBLE);
#endif
#endif
  if (H.mype == 0) fprintf(stdout, "Hydro: init mem %lfs\n", ccelaps(start, end));
  // we start timings here to avoid the cost of initial memory allocation
  start_time = dcclock();

  while ((H.t < H.tend) && (H.nstep < H.nstepmax)) {
    //system("top -b -n1");
    // reset perf counter for this iteration
    flopsAri = flopsSqr = flopsMin = flopsTra = 0;
    start_iter = dcclock();
    outnum[0] = 0;
    if ((H.nstep % 2) == 0) {
      dt = 0;
      // if (H.mype == 0) fprintf(stdout, "Hydro computes deltat.\n");
      start = cclock();
      compute_deltat(&dt, H, &Hw_deltat, &Hv, &Hvw_deltat);
      end = cclock();
      functim[TIM_COMPDT] += ccelaps(start, end);
      if (H.nstep == 0) {
        dt = dt / 2.0;
	if (H.mype == 0) fprintf(stdout, "Hydro computes initial deltat: %le\n", dt);
      }
#ifdef MPI
      if (H.nproc > 1) {
        real_t dtmin;
        // printf("pe=%4d\tdt=%lg\n",H.mype, dt);
#if FTI==0
	if (sizeof(real_t) == sizeof(double)) {
	    MPI_Allreduce(&dt, &dtmin, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
	  } else {
	    MPI_Allreduce(&dt, &dtmin, 1, MPI_FLOAT, MPI_MIN, MPI_COMM_WORLD);
	  }
#endif
#if FTI>0
	if (sizeof(real_t) == sizeof(double)) {
	  MPI_Allreduce(&dt, &dtmin, 1, MPI_DOUBLE, MPI_MIN, FTI_COMM_WORLD);
	} else {
	  MPI_Allreduce(&dt, &dtmin, 1, MPI_FLOAT, MPI_MIN, FTI_COMM_WORLD);
	}
#endif
        dt = dtmin;
      }
#endif
    }
    // dt = 1.e-3;
    // if (H.mype == 1) fprintf(stdout, "Hydro starts godunov.\n");
    if ((H.nstep % 2) == 0) {
      hydro_godunov(1, dt, H, &Hv, &Hw_godunov, &Hvw_godunov);
      //            hydro_godunov(2, dt, H, &Hv, &Hw, &Hvw);
    } else {
      hydro_godunov(2, dt, H, &Hv, &Hw_godunov, &Hvw_godunov);
      //            hydro_godunov(1, dt, H, &Hv, &Hw, &Hvw);
    }
    end_iter = dcclock();
    cellPerCycle = (double) (H.globnx * H.globny) / (end_iter - start_iter) / 1000000.0L;
    avgCellPerCycle += cellPerCycle;
    nbCycle++;

    H.nstep++;
    H.t += dt;
    {
      real_t iter_time = (real_t) (end_iter - start_iter);
#ifdef MPI
      long flopsAri_t, flopsSqr_t, flopsMin_t, flopsTra_t;
      start = cclock();
#if FTI==0
      MPI_Allreduce(&flopsAri, &flopsAri_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD);
      MPI_Allreduce(&flopsSqr, &flopsSqr_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD);
      MPI_Allreduce(&flopsMin, &flopsMin_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD);
      MPI_Allreduce(&flopsTra, &flopsTra_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD);
#endif
#if FTI>0
      MPI_Allreduce(&flopsAri, &flopsAri_t, 1, MPI_LONG, MPI_SUM, FTI_COMM_WORLD);
      MPI_Allreduce(&flopsSqr, &flopsSqr_t, 1, MPI_LONG, MPI_SUM, FTI_COMM_WORLD);
      MPI_Allreduce(&flopsMin, &flopsMin_t, 1, MPI_LONG, MPI_SUM, FTI_COMM_WORLD);
      MPI_Allreduce(&flopsTra, &flopsTra_t, 1, MPI_LONG, MPI_SUM, FTI_COMM_WORLD);
#endif
      //       if (H.mype == 1)
      //        printf("%ld %ld %ld %ld %ld %ld %ld %ld \n", flopsAri, flopsSqr, flopsMin, flopsTra, flopsAri_t, flopsSqr_t, flopsMin_t, flopsTra_t);
      flops = flopsAri_t * FLOPSARI + flopsSqr_t * FLOPSSQR + flopsMin_t * FLOPSMIN + flopsTra_t * FLOPSTRA;
      end = cclock();
      functim[TIM_ALLRED] += ccelaps(start, end);
#else
      flops = flopsAri * FLOPSARI + flopsSqr * FLOPSSQR + flopsMin * FLOPSMIN + flopsTra * FLOPSTRA;
#endif
      nbFLOPS++;

      if (flops > 0) {
        if (iter_time > 1.e-9) {
          double mflops = (double) flops / (double) 1.e+6 / iter_time;
          MflopsSUM += mflops;
          sprintf(outnum, "%s {%.2f Mflops %ld Ops} (%.3fs)", outnum, mflops, flops, iter_time);
        }
      } else {
        sprintf(outnum, "%s (%.3fs)", outnum, iter_time);
      }
    }
    if (time_output == 0 && H.noutput > 0) {
      if ((H.nstep % H.noutput) == 0) {
        vtkfile(++nvtk, H, &Hv);
        sprintf(outnum, "%s [%04d]", outnum, nvtk);
      }
    } else {
      if (time_output == 1 && H.t >= next_output_time) {
        vtkfile(++nvtk, H, &Hv);
        next_output_time = next_output_time + H.dtoutput;
        sprintf(outnum, "%s [%04d]", outnum, nvtk);
      }
    }
    if (H.mype == 0) {
	    fprintf(stdout, "--> step=%4d, %12.5e, %10.5e %.3lf MC/s%s\n", H.nstep, H.t, dt, cellPerCycle, outnum);
      fflush(stdout);
    }
#ifdef MPI
#if FTI==1
    FTI_Snapshot();     
#endif
#endif
  } // while
  end_time = dcclock();

  // Deallocate work spaces
  deallocate_work_space(H.nxyt, H, &Hw_godunov, &Hvw_godunov);
  compute_deltat_clean_mem(H, &Hw_deltat, &Hvw_deltat);

  hydro_finish(H, &Hv);
  elaps = (double) (end_time - start_time);
  timeToString(outnum, elaps);
  if (H.mype == 0) {
    fprintf(stdout, "Hydro ends in %ss (%.3lf) <%.2lf MFlops>.\n", outnum, elaps, (float) (MflopsSUM / nbFLOPS));
    fprintf(stdout, "       ");
  }
  if (H.nproc == 1) {
    int sizeFmt = sizeLabel(functim, TIM_END);
    printTimingsLabel(TIM_END, sizeFmt);
    fprintf(stdout, "\n");
    if (sizeof(real_t) == sizeof(double)) {
      fprintf(stdout, "PE0_DP ");
    } else {
      fprintf(stdout, "PE0_SP ");
    }
    printTimings(functim, TIM_END, sizeFmt);
    fprintf(stdout, "\n");
    fprintf(stdout, "%%      ");
    percentTimings(functim, TIM_END);
    printTimings(functim, TIM_END, sizeFmt);
    fprintf(stdout, "\n");
  }
#ifdef MPI
  if (H.nproc > 1) {
    double timMAX[TIM_END];
    double timMIN[TIM_END];
    double timSUM[TIM_END];
#if FTI==0
    MPI_Allreduce(functim, timMAX, TIM_END, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
    MPI_Allreduce(functim, timMIN, TIM_END, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
    MPI_Allreduce(functim, timSUM, TIM_END, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
#endif
#if FTI>0
    MPI_Allreduce(functim, timMAX, TIM_END, MPI_DOUBLE, MPI_MAX, FTI_COMM_WORLD);
    MPI_Allreduce(functim, timMIN, TIM_END, MPI_DOUBLE, MPI_MIN, FTI_COMM_WORLD);
    MPI_Allreduce(functim, timSUM, TIM_END, MPI_DOUBLE, MPI_SUM, FTI_COMM_WORLD);
#endif
    if (H.mype == 0) {
      int sizeFmt = sizeLabel(timMAX, TIM_END);
      printTimingsLabel(TIM_END, sizeFmt);
      fprintf(stdout, "\n");
      fprintf(stdout, "MIN ");
      printTimings(timMIN, TIM_END, sizeFmt);
      fprintf(stdout, "\n");
      fprintf(stdout, "MAX ");
      printTimings(timMAX, TIM_END, sizeFmt);
      fprintf(stdout, "\n");
      fprintf(stdout, "AVG ");
      avgTimings(timSUM, TIM_END, H.nproc);
      printTimings(timSUM, TIM_END, sizeFmt);
      fprintf(stdout, "\n");
    }
  }
#endif
  if (H.mype == 0) {
	  fprintf(stdout, "Average MC/s: %.3lf\n", (double)(avgCellPerCycle / nbCycle));
  }

#ifdef MPI
#if FTI>0
  FTI_Finalize();
#endif
  MPI_Finalize();
#endif
  return 0;
}
コード例 #6
0
ファイル: hd5GPU.c プロジェクト: leobago/fti
int main ( int argc, char *argv[]){
  int i;
  int state;
  int sizeOfDimension;
  int success = 1;
  int FTI_APP_RANK;
  herr_t status;
  threeD ***ptr = allocateLinearMemory(XSIZE, YSIZE, ZSIZE );
  threeD *devPtr;
  int result;
  MPI_Init(&argc, &argv);
  result = FTI_Init(argv[1], MPI_COMM_WORLD);
  if (result == FTI_NREC) {
    exit(RECOVERY_FAILED);
  }
  int crash = atoi(argv[2]);
  int level = atoi(argv[3]);



  memset(&ptr[0][0][0],0, sizeof(threeD) * (XSIZE * YSIZE * ZSIZE));

  int numGpus = getProperties();
  MPI_Comm_rank(FTI_COMM_WORLD,&FTI_APP_RANK);

  setDevice(FTI_APP_RANK%numGpus);

  dictionary *ini = iniparser_load( argv[1] );
  int grank;    
  MPI_Comm_rank(MPI_COMM_WORLD,&grank);
  int nbHeads = (int)iniparser_getint(ini, "Basic:head", -1); 
  int finalTag = (int)iniparser_getint(ini, "Advanced:final_tag", 3107);
  int nodeSize = (int)iniparser_getint(ini, "Basic:node_size", -1);
  int headRank = grank - grank%nodeSize;

  FTIT_complexType coordinateDef;
  FTIT_type threeDType;
  FTI_AddSimpleField( &coordinateDef, &FTI_INTG, offsetof( threeD, x),0, "X"); 
  FTI_AddSimpleField( &coordinateDef, &FTI_INTG, offsetof( threeD, y),1, "y"); 
  FTI_AddSimpleField( &coordinateDef, &FTI_INTG, offsetof( threeD, z),2, "z"); 
  FTI_AddSimpleField( &coordinateDef, &FTI_INTG, offsetof( threeD, id),3, "id"); 
  FTI_InitComplexType(&threeDType, &coordinateDef, 4 , sizeof(threeD), "ThreeD", NULL);
  

  if ( (nbHeads<0) || (nodeSize<0) ) {
    printf("wrong configuration (for head or node-size settings)! %d %d\n",nbHeads, nodeSize);
    MPI_Abort(MPI_COMM_WORLD, -1);
  }

  allocateMemory((void **) &devPtr, (XSIZE * YSIZE * ZSIZE*sizeof(threeD)));
  FTI_Protect(0, devPtr,  (XSIZE * YSIZE * ZSIZE),threeDType);
  int dimLength[3] = {ZSIZE,YSIZE,XSIZE};
  if (grank == 0)
    for ( i =0 ; i < 3; i++){
      printf("Dimension is %d size is %d\n", dimLength[i], XSIZE*YSIZE*ZSIZE*sizeof(threeDType) / (1024*1024));
    }
  FTI_DefineDataset(0, 3, dimLength , "GPU TOPOLOGY" , NULL);
  state = FTI_Status();
  if ( state == INIT ){
    executeKernel(devPtr);
    FTI_Checkpoint(1,level);
    if ( crash ) {
      if( nbHeads > 0 ) { 
        int value = FTI_ENDW;
        MPI_Send(&value, 1, MPI_INT, headRank, finalTag, MPI_COMM_WORLD);
        MPI_Barrier(MPI_COMM_WORLD);
      }
      MPI_Finalize();
      exit(0);
    }
  }else{
    result = FTI_Recover();
    if (result != FTI_SCES) {
      exit(RECOVERY_FAILED);
    }
    hostCopy(devPtr, &ptr[0][0][0],(XSIZE * YSIZE * ZSIZE*sizeof(threeD)));
  }
  threeD ***validationMemory= allocateLinearMemory(XSIZE, YSIZE, ZSIZE );
  initData(&validationMemory[0][0][0]);

  if (state == RESTART || state == KEEP) {
    int tmp;
    result =  memcmp(&validationMemory[0][0][0], &ptr[0][0][0],(XSIZE * YSIZE * ZSIZE*sizeof(threeD)));
    MPI_Allreduce(&result, &tmp, 1, MPI_INT, MPI_SUM, FTI_COMM_WORLD);
    result = tmp;

  }

  deallocateLinearMemory(ZSIZE , ptr);
  deallocateLinearMemory(ZSIZE , validationMemory);
  freeCuda(devPtr);

  if (FTI_APP_RANK == 0 && (state == RESTART || state == KEEP)) {
    if (result == 0) {
      printf("[SUCCESSFUL]\n");
    } else {
      printf("[NOT SUCCESSFUL]\n");
      success=0;
    }
  }

  MPI_Barrier(FTI_COMM_WORLD);
  FTI_Finalize();
  MPI_Finalize();

  if (success == 1)
    return 0;
  else
    exit(DATA_CORRUPT);
}