Exemple #1
0
int
main(int argc, char *argv[])
{
    json_object *main_obj = json_object_new_object();
    json_object *parallel_obj = json_object_new_object();
    json_object *problem_obj = 0;
    json_object *clargs_obj = 0;
    MACSIO_TIMING_GroupMask_t main_grp;
    MACSIO_TIMING_TimerId_t main_tid;
    int i, argi, exercise_scr = 0;
    int size = 1, rank = 0;

    /* quick pre-scan for scr cl flag */
    for (i = 0; i < argc && !exercise_scr; i++)
        exercise_scr = !strcmp("exercise_scr", argv[i]);

#warning SHOULD WE BE USING MPI-3 API
#ifdef HAVE_MPI
    MPI_Init(&argc, &argv);
#ifdef HAVE_SCR
#warning SANITY CHECK WITH MIFFPP
    if (exercise_scr)
        SCR_Init();
#endif
    MPI_Comm_dup(MPI_COMM_WORLD, &MACSIO_MAIN_Comm);
    MPI_Errhandler_set(MACSIO_MAIN_Comm, MPI_ERRORS_RETURN);
    MPI_Comm_size(MACSIO_MAIN_Comm, &MACSIO_MAIN_Size);
    MPI_Comm_rank(MACSIO_MAIN_Comm, &MACSIO_MAIN_Rank);
    mpi_errno = MPI_SUCCESS;
#endif
    errno = 0;

    main_grp = MACSIO_TIMING_GroupMask("MACSIO main()");
    main_tid = MT_StartTimer("main", main_grp, MACSIO_TIMING_ITER_AUTO);

    MACSIO_LOG_StdErr = MACSIO_LOG_LogInit(MACSIO_MAIN_Comm, 0, 0, 0, 0);

    /* Process the command line and put the results in the problem */
    clargs_obj = ProcessCommandLine(argc, argv, &argi);
    json_object_object_add(main_obj, "clargs", clargs_obj);

    strncpy(MACSIO_UTILS_UnitsPrefixSystem, JsonGetStr(clargs_obj, "units_prefix_system"),
        sizeof(MACSIO_UTILS_UnitsPrefixSystem));

    MACSIO_LOG_MainLog = MACSIO_LOG_LogInit(MACSIO_MAIN_Comm,
        JsonGetStr(clargs_obj, "log_file_name"),
        JsonGetInt(clargs_obj, "log_line_length"),
        JsonGetInt(clargs_obj, "log_line_cnt/0"),
        JsonGetInt(clargs_obj, "log_line_cnt/1"));

#warning THESE INITIALIZATIONS SHOULD BE IN MACSIO_LOG
    MACSIO_LOG_DebugLevel = JsonGetInt(clargs_obj, "debug_level");

    /* Setup parallel information */
    json_object_object_add(parallel_obj, "mpi_size", json_object_new_int(MACSIO_MAIN_Size));
    json_object_object_add(parallel_obj, "mpi_rank", json_object_new_int(MACSIO_MAIN_Rank));
    json_object_object_add(main_obj, "parallel", parallel_obj);

#warning SHOULD WE INCLUDE TOP-LEVEL INFO ON VAR NAMES AND WHETHER THEYRE RESTRICTED
#warning CREATE AN IO CONTEXT OBJECT
    /* Acquire an I/O context handle from the plugin */

    /* Do a read or write test */
    if (strcmp(JsonGetStr(clargs_obj, "read_path"),"null"))
        main_read(argi, argc, argv, main_obj);
    else
        main_write(argi, argc, argv, main_obj);

    /* stop total timer */
    MT_StopTimer(main_tid);

    /* Write timings data file if requested */
    if (strlen(JsonGetStr(clargs_obj, "timings_file_name")))
        write_timings_file(JsonGetStr(clargs_obj, "timings_file_name"));

    MACSIO_TIMING_ClearTimers(MACSIO_TIMING_ALL_GROUPS);

#warning ATEXIT THESE
    if (json_object_put(main_obj) != 1)
    {
        MACSIO_LOG_MSG(Info, ("Unable to free main JSON object"));
    }
    MACSIO_TIMING_GroupMask(0);
    MACSIO_TIMING_ReduceTimers(MACSIO_MAIN_Comm, -1);
    json_object_apath_get_string(0,0); /* free circ cache */
    MACSIO_LOG_LogFinalize(MACSIO_LOG_MainLog);
    MACSIO_LOG_LogFinalize(MACSIO_LOG_StdErr);

#ifdef HAVE_SCR
    if (exercise_scr)
        SCR_Finalize();
#endif

#ifdef HAVE_MPI
    {   int result;
        if ((MPI_Initialized(&result) == MPI_SUCCESS) && result)
            MPI_Finalize();
    }
#endif

#warning FIX RETVAL OF MAIN TO BE NON-ZERO WHEN ERRORS OCCUR
    return (0);
}
Exemple #2
0
int main (int argc, char* argv[])
{
  char *path_to_stdout = NULL;
  int scr_retval;
  /* check that we got an appropriate number of arguments */
  if (argc == 2) {
    path_to_stdout = argv[1];
  }
  else if(argc == 5){
    filesize = (size_t) atol(argv[1]);
    times = atoi(argv[2]);
    seconds = atoi(argv[3]);
    path_to_stdout = argv[4];
  }
  else{
    printf("Usage: test_api_file [filesize times sleep_secs path_to_stdout]\n");
    printf("OR: test_api_file [ path_to_stdout]\n");
    exit(1);
  }
  
  MPI_Init(&argc, &argv);

  int rank = -1, size = 0;
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &size);

  /* open file for stdout */
  printf("new stdout filename: \"%s\"\n", path_to_stdout);
  fflush(stdout);
  freopen(path_to_stdout, "a+", stdout);
  MPI_Barrier(MPI_COMM_WORLD);

  /* time how long it takes to get through init */
  MPI_Barrier(MPI_COMM_WORLD);

  double init_start = MPI_Wtime();
  if (SCR_Init() != SCR_SUCCESS){
    printf("FAILED INITIALIZING SCR\n");
    fclose(stdout);
    return -1;
  }
  double init_end = MPI_Wtime();
  double secs = init_end - init_start;

  MPI_Barrier(MPI_COMM_WORLD);

  double secsmin, secsmax, secssum;
  MPI_Reduce(&secs, &secsmin, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
  MPI_Reduce(&secs, &secsmax, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
  MPI_Reduce(&secs, &secssum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
  if (rank == 0) { printf("Init: Min %8.6f s\tMax %8.6f s\tAvg %8.6f s\n", secsmin, secsmax, secssum/size); }

  MPI_Barrier(MPI_COMM_WORLD);

  int num_files = rank % 4;
  char** files = NULL;
  char** bufs  = NULL;
  size_t* filesizes = NULL;
  char* buf = NULL;
  if (num_files > 0) {
    files = (char**) malloc(num_files * sizeof(char*));
    bufs  = (char**) malloc(num_files * sizeof(char*));
    filesizes = (size_t*) malloc(num_files * sizeof(size_t));
  }

  int i;
  for (i=0; i < num_files; i++) {
    // route our checkpoint file
    char name[256];
    sprintf(name, "rank_%d.%d.ckpt", rank, i);
    files[i] = strdup(name);
    filesizes[i] = filesize + rank + 2*i;
    bufs[i] = (char*) malloc(filesizes[i]);
  }
  if (num_files > 0) {
    buf = (char*) malloc(filesizes[num_files-1]);
  }

  // check each of our checkpoint files
  int found_checkpoint = 1;
  for (i=0; i < num_files; i++) {
    char file[2094];
    scr_retval = SCR_Route_file(files[i], file);
    if (scr_retval != SCR_SUCCESS) {
      printf("%d: failed calling SCR_Route_file(): %d: @%s:%d\n",
             rank, scr_retval, __FILE__, __LINE__
      );
    }
    if (read_checkpoint(file, &timestep, buf, filesizes[i])) {
      // check that contents are good
      if (!check_buffer(buf, filesizes[i], rank + 2*i, timestep)) {
        printf("!!!!CORRUPTION!!!! Rank %d, File %s: Invalid value in buffer\n", rank, file);
        fflush(stdout);
        fclose(stdout);
        MPI_Abort(MPI_COMM_WORLD, 1);
        return 1;
      }
    } else {
      found_checkpoint = 0;
    }
  }

  // check that everyone found their checkpoint files ok
  int all_found_checkpoint = 0;
  MPI_Allreduce(&found_checkpoint, &all_found_checkpoint, 1, MPI_INT, MPI_LAND, MPI_COMM_WORLD);
  if (!all_found_checkpoint && rank == 0) {
    printf("At least one rank (perhaps all) did not find its checkpoint\n");
    fflush(stdout);
  }

  // check that everyone is at the same timestep
  int timestep_and, timestep_or;
  int timestep_a, timestep_o;
  if (num_files > 0) {
    timestep_a = timestep;
    timestep_o = timestep;
  } else {
    timestep_a = 0xffffffff;
    timestep_o = 0x00000000;
  }
  MPI_Allreduce(&timestep_a, &timestep_and, 1, MPI_INT, MPI_BAND, MPI_COMM_WORLD);
  MPI_Allreduce(&timestep_o, &timestep_or,  1, MPI_INT, MPI_BOR,  MPI_COMM_WORLD);
  if (timestep_and != timestep_or) {
    printf("%d: Timesteps don't agree: timestep %d\n", rank, timestep);
    fflush(stdout);
    fclose(stdout);
    return 1;
  }
  timestep = timestep_and;

  // make up some data for the next checkpoint
  for (i=0; i < num_files; i++) {
    init_buffer(bufs[i], filesizes[i], rank + 2*i, timestep);
  }

  timestep++;

  // prime system once before timing
  int t;
  for(t=0; t < 1; t++) {
    int rc;
    int all_valid = 1;
    scr_retval = SCR_Start_checkpoint();
    if (scr_retval != SCR_SUCCESS) {
      printf("%d: failed calling SCR_Start_checkpoint(): %d: @%s:%d\n",
             rank, scr_retval, __FILE__, __LINE__
      );
    }
  for (i=0; i < num_files; i++) {
    int valid = 0;
    char file[2094];
    scr_retval = SCR_Route_file(files[i], file);
    if (scr_retval != SCR_SUCCESS) {
      printf("%d: failed calling SCR_route_file(): %d: @%s:%d\n",
             rank, scr_retval, __FILE__, __LINE__
      );
    }
    int fd_me = open(file, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
    if (fd_me > 0) {
      valid = 1;

      // write the checkpoint
      rc = write_checkpoint(fd_me, timestep, bufs[i], filesizes[i]);
      if (rc < 0) { valid = 0; }

      rc = fsync(fd_me);
      if (rc < 0) { valid = 0; }

      // make sure the close is without error
      rc = close(fd_me);
      if (rc < 0) { valid = 0; }
    }
    if (!valid) { all_valid = 0; }
  }
  scr_retval = SCR_Complete_checkpoint(all_valid);
  if (scr_retval != SCR_SUCCESS) {
    printf("%d: failed calling SCR_Complete_checkpoint(): %d: @%s:%d\n",
           rank, scr_retval, __FILE__, __LINE__
    );
  }
  if (rank == 0) { printf("Completed checkpoint %d.\n", timestep); fflush(stdout); }

  timestep++;
  }
  MPI_Barrier(MPI_COMM_WORLD);

  if (times > 0) {
    int count = 0;
    double time_start = MPI_Wtime();
    for(t=0; t < times; t++) {
      int rc;
      int all_valid = 1;
      scr_retval = SCR_Start_checkpoint();
      if (scr_retval != SCR_SUCCESS) {
        printf("%d: failed calling SCR_Start_checkpoint(): %d: @%s:%d\n",
               rank, scr_retval, __FILE__, __LINE__
        );
      }
      for (i=0; i < num_files; i++) {
        int valid = 0;
        char file[2094];
        scr_retval = SCR_Route_file(files[i], file);
        if (scr_retval != SCR_SUCCESS) {
          printf("%d: failed calling SCR_Route_file(): %d: @%s:%d\n",
                 rank, scr_retval, __FILE__, __LINE__
          );
        }
        int fd_me = open(file, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
        if (fd_me > 0) {
          count++;
          valid = 1;
          
          // write the checkpoint
          rc = write_checkpoint(fd_me, timestep, bufs[i], filesizes[i]);
          if (rc < 0) { valid = 0; }
          
          rc = fsync(fd_me);
          if (rc < 0) { valid = 0; }
          
          // make sure the close is without error
          rc = close(fd_me);
          if (rc < 0) { valid = 0; }
        }
        if (!valid) { all_valid = 0; }
      }
      scr_retval = SCR_Complete_checkpoint(all_valid);
      if (scr_retval != SCR_SUCCESS) {
        printf("%d: failed calling SCR_Complete_checkpoint(): %d: @%s:%d\n",
               rank, scr_retval, __FILE__, __LINE__
        );
      }
      if (rank == 0) { printf("Completed checkpoint %d.\n", timestep); fflush(stdout); }
      
      timestep++;
      if (seconds > 0) {
        if (rank == 0) { printf("Sleeping for %d seconds... \n", seconds); fflush(stdout); }
        sleep(seconds);
      }
    }
    double time_end = MPI_Wtime();
    double bw = (filesize*count/(1024*1024)) / (time_end - time_start);
    
    MPI_Barrier(MPI_COMM_WORLD);
    
    double bwmin, bwmax, bwsum;
    MPI_Reduce(&bw, &bwmin, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
    MPI_Reduce(&bw, &bwmax, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
    MPI_Reduce(&bw, &bwsum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    if (rank == 0) { printf("FileIO: Min %7.2f MB/s\tMax %7.2f MB/s\tAvg %7.2f MB/s\n", bwmin, bwmax, bwsum/size); }
  }

  if (buf != NULL) { free(buf); buf = NULL; }
  for (i=0; i < num_files; i++) {
    if (bufs[i]  != NULL) { free(bufs[i]);  bufs[i]  = NULL; }
    if (files[i] != NULL) { free(files[i]); files[i] = NULL; }
  }
  if (files     != NULL) { free(files);     files     = NULL; }
  if (bufs      != NULL) { free(bufs);      bufs      = NULL; }
  if (filesizes != NULL) { free(filesizes); filesizes = NULL; }

  scr_retval = SCR_Finalize();
  if (scr_retval != SCR_SUCCESS) {
    printf("%d: failed calling SCR_Finalize(): %d: @%s:%d\n",
           rank, scr_retval, __FILE__, __LINE__
    );
  }
  MPI_Finalize();

  fclose(stdout);
  return 0;
}
Exemple #3
0
int
main(int argc,char *argv[])
{
  int    i,j,k,nn;
  int    mx,my,mz,it;
  float  gosa;
  double cpu,cpu0,cpu1,flop,target;

  target= 60.0;
  omega= 0.8;
  mx= MX0-1;
  my= MY0-1;
  mz= MZ0-1;
  ndx= NDX0;
  ndy= NDY0;
  ndz= NDZ0;

  MPI_Init(&argc, &argv);
#ifdef SCR_ENABLE
  SCR_Init();
#endif
  
  MPI_Comm_size(MPI_COMM_WORLD, &npe);
  MPI_Comm_rank(MPI_COMM_WORLD, &id);

  initcomm(ndx,ndy,ndz);
  it= initmax(mx,my,mz);

  /*
   *    Initializing matrixes
   */
  initmt(mx,it);

  if(id==0){
    printf("Sequential version array size\n");
    printf(" mimax = %d mjmax = %d mkmax = %d\n",MX0,MY0,MZ0);
    printf("Parallel version array size\n");
    printf(" mimax = %d mjmax = %d mkmax = %d\n",MIMAX,MJMAX,MKMAX);
    printf("imax = %d jmax = %d kmax =%d\n",imax,jmax,kmax);
    printf("I-decomp = %d J-decomp = %d K-decomp =%d\n",ndx,ndy,ndz);
  }

  nn= 3;
  if(id==0){
    printf(" Start rehearsal measurement process.\n");
    printf(" Measure the performance in %d times.\n\n",nn);
  }

  MPI_Barrier(MPI_COMM_WORLD);
  cpu0= MPI_Wtime();
  gosa= jacobi(nn);
  cpu1= MPI_Wtime() - cpu0;

  MPI_Allreduce(&cpu1,
                &cpu,
                1,
                MPI_DOUBLE,
                MPI_MAX,
                MPI_COMM_WORLD);

  flop= fflop(mz,my,mx);
  if(id == 0){
    printf(" MFLOPS: %f time(s): %f %e\n\n",
           mflops(nn,cpu,flop),cpu,gosa);
  }
 nn= (int)(target/(cpu/3.0));

  if(id == 0){
    printf(" Now, start the actual measurement process.\n");
    printf(" The loop will be excuted in %d times\n",nn);
    printf(" This will take about one minute.\n");
    printf(" Wait for a while\n\n");
  }

  /*
   *    Start measuring
   */
  MPI_Barrier(MPI_COMM_WORLD);
  cpu0 = MPI_Wtime();
  //  nn = 10000000;
  gosa = jacobi(nn);
  cpu1 = MPI_Wtime() - cpu0;

  MPI_Allreduce(&cpu1,
                &cpu,
                1,
                MPI_DOUBLE,
                MPI_MAX,
                MPI_COMM_WORLD);

  if(id == 0){
    fprintf(stderr, "cpu : %f sec.\n", cpu);
    fprintf(stderr, "Loop executed for %d times\n",nn);
    fprintf(stderr, "Gosa : %e \n",gosa);
    fprintf(stderr, "GFLOPS measured : %f\n",mflops(nn,cpu,flop)/1000.0);
    fprintf(stderr, "Score based on Pentium III 600MHz : %f\n",
           mflops(nn,cpu,flop)/82.84);
  }

#ifdef SCR_ENABLE  
  SCR_Finalize();
#endif
  MPI_Finalize();
  
  return (0);
}
Exemple #4
0
int main (int argc, char* argv[])
{
  /* check that we got an appropriate number of arguments */
  if (argc != 1 && argc != 4) {
    printf("Usage: test_correctness [filesize times sleep_secs]\n");
    return 1;
  }

  /* read parameters from command line, if any */
  if (argc > 1) {
    filesize = (size_t) atol(argv[1]);
    times = atoi(argv[2]);
    seconds = atoi(argv[3]);
  }

  MPI_Init(&argc, &argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &ranks);

  /* time how long it takes to get through init */
  MPI_Barrier(MPI_COMM_WORLD);
  double init_start = MPI_Wtime();
  SCR_Init();
  double init_end = MPI_Wtime();
  double secs = init_end - init_start;
  MPI_Barrier(MPI_COMM_WORLD);

  /* compute and print the init stats */
  double secsmin, secsmax, secssum;
  MPI_Reduce(&secs, &secsmin, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
  MPI_Reduce(&secs, &secsmax, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
  MPI_Reduce(&secs, &secssum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
  if (rank == 0) {
    printf("Init: Min %8.6f s\tMax %8.6f s\tAvg %8.6f s\n", secsmin, secsmax, secssum/ranks);
  }

  MPI_Barrier(MPI_COMM_WORLD);

  /* allocate space for the checkpoint data (make filesize a function of rank for some variation) */
  filesize = filesize + rank;
  char* buf = (char*) malloc(filesize);
  
  /* get the name of our checkpoint file to open for read on restart */
  char name[256];
  char file[SCR_MAX_FILENAME];
  sprintf(name, "rank_%d.ckpt", rank);
  int found_checkpoint = 0;
  if (SCR_Route_file(name, file) == SCR_SUCCESS) {
    if (read_checkpoint(file, &timestep, buf, filesize)) {
      /* read the file ok, now check that contents are good */
      found_checkpoint = 1;
      //printf("%d: Successfully read checkpoint from %s\n", rank, file);
      if (!check_buffer(buf, filesize, rank, timestep)) {
        printf("%d: Invalid value in buffer\n", rank);
        MPI_Abort(MPI_COMM_WORLD, 1);
        return 1;
      }
    } else {
    	printf("%d: Could not read checkpoint %d from %s\n", rank, timestep, file);
    }
  } else
    printf("%d: SCR_Route_file failed during restart attempt\n", rank);

  /* determine whether all tasks successfully read their checkpoint file */
  int all_found_checkpoint = 0;
  MPI_Allreduce(&found_checkpoint, &all_found_checkpoint, 1, MPI_INT, MPI_LAND, MPI_COMM_WORLD);
  if (!all_found_checkpoint && rank == 0) {
    printf("At least one rank (perhaps all) did not find its checkpoint\n");
  }

  /* check that everyone is at the same timestep */
  int timestep_and, timestep_or;
  MPI_Allreduce(&timestep, &timestep_and, 1, MPI_INT, MPI_BAND, MPI_COMM_WORLD);
  MPI_Allreduce(&timestep, &timestep_or,  1, MPI_INT, MPI_BOR,  MPI_COMM_WORLD);
  if (timestep_and != timestep_or) {
    printf("%d: Timesteps don't agree: timestep %d\n", rank, timestep);
    return 1;
  }

  /* make up some data for the next checkpoint */
  init_buffer(buf, filesize, rank, timestep);

  timestep++;

  /* prime system once before timing */
  getbw(name, buf, filesize, 1);

  /* now compute the bandwidth and print stats */
  if (times > 0) {
    double bw = getbw(name, buf, filesize, times);

    MPI_Barrier(MPI_COMM_WORLD);

    /* compute stats and print them to the screen */
    double bwmin, bwmax, bwsum;
    MPI_Reduce(&bw, &bwmin, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
    MPI_Reduce(&bw, &bwmax, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
    MPI_Reduce(&bw, &bwsum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    if (rank == 0) {
      printf("FileIO: Min %7.2f MB/s\tMax %7.2f MB/s\tAvg %7.2f MB/s\tAgg %7.2f MB/s\n",
             bwmin, bwmax, bwsum/ranks, bwsum
      );
    }
  }

  if (buf != NULL) {
    free(buf);
    buf = NULL;
  }

  SCR_Finalize();
  MPI_Finalize();

  return 0;
}