int main(int argc, char *argv[]) { json_object *main_obj = json_object_new_object(); json_object *parallel_obj = json_object_new_object(); json_object *problem_obj = 0; json_object *clargs_obj = 0; MACSIO_TIMING_GroupMask_t main_grp; MACSIO_TIMING_TimerId_t main_tid; int i, argi, exercise_scr = 0; int size = 1, rank = 0; /* quick pre-scan for scr cl flag */ for (i = 0; i < argc && !exercise_scr; i++) exercise_scr = !strcmp("exercise_scr", argv[i]); #warning SHOULD WE BE USING MPI-3 API #ifdef HAVE_MPI MPI_Init(&argc, &argv); #ifdef HAVE_SCR #warning SANITY CHECK WITH MIFFPP if (exercise_scr) SCR_Init(); #endif MPI_Comm_dup(MPI_COMM_WORLD, &MACSIO_MAIN_Comm); MPI_Errhandler_set(MACSIO_MAIN_Comm, MPI_ERRORS_RETURN); MPI_Comm_size(MACSIO_MAIN_Comm, &MACSIO_MAIN_Size); MPI_Comm_rank(MACSIO_MAIN_Comm, &MACSIO_MAIN_Rank); mpi_errno = MPI_SUCCESS; #endif errno = 0; main_grp = MACSIO_TIMING_GroupMask("MACSIO main()"); main_tid = MT_StartTimer("main", main_grp, MACSIO_TIMING_ITER_AUTO); MACSIO_LOG_StdErr = MACSIO_LOG_LogInit(MACSIO_MAIN_Comm, 0, 0, 0, 0); /* Process the command line and put the results in the problem */ clargs_obj = ProcessCommandLine(argc, argv, &argi); json_object_object_add(main_obj, "clargs", clargs_obj); strncpy(MACSIO_UTILS_UnitsPrefixSystem, JsonGetStr(clargs_obj, "units_prefix_system"), sizeof(MACSIO_UTILS_UnitsPrefixSystem)); MACSIO_LOG_MainLog = MACSIO_LOG_LogInit(MACSIO_MAIN_Comm, JsonGetStr(clargs_obj, "log_file_name"), JsonGetInt(clargs_obj, "log_line_length"), JsonGetInt(clargs_obj, "log_line_cnt/0"), JsonGetInt(clargs_obj, "log_line_cnt/1")); #warning THESE INITIALIZATIONS SHOULD BE IN MACSIO_LOG MACSIO_LOG_DebugLevel = JsonGetInt(clargs_obj, "debug_level"); /* Setup parallel information */ json_object_object_add(parallel_obj, "mpi_size", json_object_new_int(MACSIO_MAIN_Size)); json_object_object_add(parallel_obj, "mpi_rank", json_object_new_int(MACSIO_MAIN_Rank)); json_object_object_add(main_obj, "parallel", parallel_obj); #warning SHOULD WE INCLUDE TOP-LEVEL INFO ON VAR NAMES AND WHETHER THEYRE RESTRICTED #warning CREATE AN IO CONTEXT OBJECT /* Acquire an I/O context handle from the plugin */ /* Do a read or write test */ if (strcmp(JsonGetStr(clargs_obj, "read_path"),"null")) main_read(argi, argc, argv, main_obj); else main_write(argi, argc, argv, main_obj); /* stop total timer */ MT_StopTimer(main_tid); /* Write timings data file if requested */ if (strlen(JsonGetStr(clargs_obj, "timings_file_name"))) write_timings_file(JsonGetStr(clargs_obj, "timings_file_name")); MACSIO_TIMING_ClearTimers(MACSIO_TIMING_ALL_GROUPS); #warning ATEXIT THESE if (json_object_put(main_obj) != 1) { MACSIO_LOG_MSG(Info, ("Unable to free main JSON object")); } MACSIO_TIMING_GroupMask(0); MACSIO_TIMING_ReduceTimers(MACSIO_MAIN_Comm, -1); json_object_apath_get_string(0,0); /* free circ cache */ MACSIO_LOG_LogFinalize(MACSIO_LOG_MainLog); MACSIO_LOG_LogFinalize(MACSIO_LOG_StdErr); #ifdef HAVE_SCR if (exercise_scr) SCR_Finalize(); #endif #ifdef HAVE_MPI { int result; if ((MPI_Initialized(&result) == MPI_SUCCESS) && result) MPI_Finalize(); } #endif #warning FIX RETVAL OF MAIN TO BE NON-ZERO WHEN ERRORS OCCUR return (0); }
int main (int argc, char* argv[]) { char *path_to_stdout = NULL; int scr_retval; /* check that we got an appropriate number of arguments */ if (argc == 2) { path_to_stdout = argv[1]; } else if(argc == 5){ filesize = (size_t) atol(argv[1]); times = atoi(argv[2]); seconds = atoi(argv[3]); path_to_stdout = argv[4]; } else{ printf("Usage: test_api_file [filesize times sleep_secs path_to_stdout]\n"); printf("OR: test_api_file [ path_to_stdout]\n"); exit(1); } MPI_Init(&argc, &argv); int rank = -1, size = 0; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); /* open file for stdout */ printf("new stdout filename: \"%s\"\n", path_to_stdout); fflush(stdout); freopen(path_to_stdout, "a+", stdout); MPI_Barrier(MPI_COMM_WORLD); /* time how long it takes to get through init */ MPI_Barrier(MPI_COMM_WORLD); double init_start = MPI_Wtime(); if (SCR_Init() != SCR_SUCCESS){ printf("FAILED INITIALIZING SCR\n"); fclose(stdout); return -1; } double init_end = MPI_Wtime(); double secs = init_end - init_start; MPI_Barrier(MPI_COMM_WORLD); double secsmin, secsmax, secssum; MPI_Reduce(&secs, &secsmin, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); MPI_Reduce(&secs, &secsmax, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&secs, &secssum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if (rank == 0) { printf("Init: Min %8.6f s\tMax %8.6f s\tAvg %8.6f s\n", secsmin, secsmax, secssum/size); } MPI_Barrier(MPI_COMM_WORLD); int num_files = rank % 4; char** files = NULL; char** bufs = NULL; size_t* filesizes = NULL; char* buf = NULL; if (num_files > 0) { files = (char**) malloc(num_files * sizeof(char*)); bufs = (char**) malloc(num_files * sizeof(char*)); filesizes = (size_t*) malloc(num_files * sizeof(size_t)); } int i; for (i=0; i < num_files; i++) { // route our checkpoint file char name[256]; sprintf(name, "rank_%d.%d.ckpt", rank, i); files[i] = strdup(name); filesizes[i] = filesize + rank + 2*i; bufs[i] = (char*) malloc(filesizes[i]); } if (num_files > 0) { buf = (char*) malloc(filesizes[num_files-1]); } // check each of our checkpoint files int found_checkpoint = 1; for (i=0; i < num_files; i++) { char file[2094]; scr_retval = SCR_Route_file(files[i], file); if (scr_retval != SCR_SUCCESS) { printf("%d: failed calling SCR_Route_file(): %d: @%s:%d\n", rank, scr_retval, __FILE__, __LINE__ ); } if (read_checkpoint(file, ×tep, buf, filesizes[i])) { // check that contents are good if (!check_buffer(buf, filesizes[i], rank + 2*i, timestep)) { printf("!!!!CORRUPTION!!!! Rank %d, File %s: Invalid value in buffer\n", rank, file); fflush(stdout); fclose(stdout); MPI_Abort(MPI_COMM_WORLD, 1); return 1; } } else { found_checkpoint = 0; } } // check that everyone found their checkpoint files ok int all_found_checkpoint = 0; MPI_Allreduce(&found_checkpoint, &all_found_checkpoint, 1, MPI_INT, MPI_LAND, MPI_COMM_WORLD); if (!all_found_checkpoint && rank == 0) { printf("At least one rank (perhaps all) did not find its checkpoint\n"); fflush(stdout); } // check that everyone is at the same timestep int timestep_and, timestep_or; int timestep_a, timestep_o; if (num_files > 0) { timestep_a = timestep; timestep_o = timestep; } else { timestep_a = 0xffffffff; timestep_o = 0x00000000; } MPI_Allreduce(×tep_a, ×tep_and, 1, MPI_INT, MPI_BAND, MPI_COMM_WORLD); MPI_Allreduce(×tep_o, ×tep_or, 1, MPI_INT, MPI_BOR, MPI_COMM_WORLD); if (timestep_and != timestep_or) { printf("%d: Timesteps don't agree: timestep %d\n", rank, timestep); fflush(stdout); fclose(stdout); return 1; } timestep = timestep_and; // make up some data for the next checkpoint for (i=0; i < num_files; i++) { init_buffer(bufs[i], filesizes[i], rank + 2*i, timestep); } timestep++; // prime system once before timing int t; for(t=0; t < 1; t++) { int rc; int all_valid = 1; scr_retval = SCR_Start_checkpoint(); if (scr_retval != SCR_SUCCESS) { printf("%d: failed calling SCR_Start_checkpoint(): %d: @%s:%d\n", rank, scr_retval, __FILE__, __LINE__ ); } for (i=0; i < num_files; i++) { int valid = 0; char file[2094]; scr_retval = SCR_Route_file(files[i], file); if (scr_retval != SCR_SUCCESS) { printf("%d: failed calling SCR_route_file(): %d: @%s:%d\n", rank, scr_retval, __FILE__, __LINE__ ); } int fd_me = open(file, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); if (fd_me > 0) { valid = 1; // write the checkpoint rc = write_checkpoint(fd_me, timestep, bufs[i], filesizes[i]); if (rc < 0) { valid = 0; } rc = fsync(fd_me); if (rc < 0) { valid = 0; } // make sure the close is without error rc = close(fd_me); if (rc < 0) { valid = 0; } } if (!valid) { all_valid = 0; } } scr_retval = SCR_Complete_checkpoint(all_valid); if (scr_retval != SCR_SUCCESS) { printf("%d: failed calling SCR_Complete_checkpoint(): %d: @%s:%d\n", rank, scr_retval, __FILE__, __LINE__ ); } if (rank == 0) { printf("Completed checkpoint %d.\n", timestep); fflush(stdout); } timestep++; } MPI_Barrier(MPI_COMM_WORLD); if (times > 0) { int count = 0; double time_start = MPI_Wtime(); for(t=0; t < times; t++) { int rc; int all_valid = 1; scr_retval = SCR_Start_checkpoint(); if (scr_retval != SCR_SUCCESS) { printf("%d: failed calling SCR_Start_checkpoint(): %d: @%s:%d\n", rank, scr_retval, __FILE__, __LINE__ ); } for (i=0; i < num_files; i++) { int valid = 0; char file[2094]; scr_retval = SCR_Route_file(files[i], file); if (scr_retval != SCR_SUCCESS) { printf("%d: failed calling SCR_Route_file(): %d: @%s:%d\n", rank, scr_retval, __FILE__, __LINE__ ); } int fd_me = open(file, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); if (fd_me > 0) { count++; valid = 1; // write the checkpoint rc = write_checkpoint(fd_me, timestep, bufs[i], filesizes[i]); if (rc < 0) { valid = 0; } rc = fsync(fd_me); if (rc < 0) { valid = 0; } // make sure the close is without error rc = close(fd_me); if (rc < 0) { valid = 0; } } if (!valid) { all_valid = 0; } } scr_retval = SCR_Complete_checkpoint(all_valid); if (scr_retval != SCR_SUCCESS) { printf("%d: failed calling SCR_Complete_checkpoint(): %d: @%s:%d\n", rank, scr_retval, __FILE__, __LINE__ ); } if (rank == 0) { printf("Completed checkpoint %d.\n", timestep); fflush(stdout); } timestep++; if (seconds > 0) { if (rank == 0) { printf("Sleeping for %d seconds... \n", seconds); fflush(stdout); } sleep(seconds); } } double time_end = MPI_Wtime(); double bw = (filesize*count/(1024*1024)) / (time_end - time_start); MPI_Barrier(MPI_COMM_WORLD); double bwmin, bwmax, bwsum; MPI_Reduce(&bw, &bwmin, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); MPI_Reduce(&bw, &bwmax, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&bw, &bwsum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if (rank == 0) { printf("FileIO: Min %7.2f MB/s\tMax %7.2f MB/s\tAvg %7.2f MB/s\n", bwmin, bwmax, bwsum/size); } } if (buf != NULL) { free(buf); buf = NULL; } for (i=0; i < num_files; i++) { if (bufs[i] != NULL) { free(bufs[i]); bufs[i] = NULL; } if (files[i] != NULL) { free(files[i]); files[i] = NULL; } } if (files != NULL) { free(files); files = NULL; } if (bufs != NULL) { free(bufs); bufs = NULL; } if (filesizes != NULL) { free(filesizes); filesizes = NULL; } scr_retval = SCR_Finalize(); if (scr_retval != SCR_SUCCESS) { printf("%d: failed calling SCR_Finalize(): %d: @%s:%d\n", rank, scr_retval, __FILE__, __LINE__ ); } MPI_Finalize(); fclose(stdout); return 0; }
int main(int argc,char *argv[]) { int i,j,k,nn; int mx,my,mz,it; float gosa; double cpu,cpu0,cpu1,flop,target; target= 60.0; omega= 0.8; mx= MX0-1; my= MY0-1; mz= MZ0-1; ndx= NDX0; ndy= NDY0; ndz= NDZ0; MPI_Init(&argc, &argv); #ifdef SCR_ENABLE SCR_Init(); #endif MPI_Comm_size(MPI_COMM_WORLD, &npe); MPI_Comm_rank(MPI_COMM_WORLD, &id); initcomm(ndx,ndy,ndz); it= initmax(mx,my,mz); /* * Initializing matrixes */ initmt(mx,it); if(id==0){ printf("Sequential version array size\n"); printf(" mimax = %d mjmax = %d mkmax = %d\n",MX0,MY0,MZ0); printf("Parallel version array size\n"); printf(" mimax = %d mjmax = %d mkmax = %d\n",MIMAX,MJMAX,MKMAX); printf("imax = %d jmax = %d kmax =%d\n",imax,jmax,kmax); printf("I-decomp = %d J-decomp = %d K-decomp =%d\n",ndx,ndy,ndz); } nn= 3; if(id==0){ printf(" Start rehearsal measurement process.\n"); printf(" Measure the performance in %d times.\n\n",nn); } MPI_Barrier(MPI_COMM_WORLD); cpu0= MPI_Wtime(); gosa= jacobi(nn); cpu1= MPI_Wtime() - cpu0; MPI_Allreduce(&cpu1, &cpu, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); flop= fflop(mz,my,mx); if(id == 0){ printf(" MFLOPS: %f time(s): %f %e\n\n", mflops(nn,cpu,flop),cpu,gosa); } nn= (int)(target/(cpu/3.0)); if(id == 0){ printf(" Now, start the actual measurement process.\n"); printf(" The loop will be excuted in %d times\n",nn); printf(" This will take about one minute.\n"); printf(" Wait for a while\n\n"); } /* * Start measuring */ MPI_Barrier(MPI_COMM_WORLD); cpu0 = MPI_Wtime(); // nn = 10000000; gosa = jacobi(nn); cpu1 = MPI_Wtime() - cpu0; MPI_Allreduce(&cpu1, &cpu, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); if(id == 0){ fprintf(stderr, "cpu : %f sec.\n", cpu); fprintf(stderr, "Loop executed for %d times\n",nn); fprintf(stderr, "Gosa : %e \n",gosa); fprintf(stderr, "GFLOPS measured : %f\n",mflops(nn,cpu,flop)/1000.0); fprintf(stderr, "Score based on Pentium III 600MHz : %f\n", mflops(nn,cpu,flop)/82.84); } #ifdef SCR_ENABLE SCR_Finalize(); #endif MPI_Finalize(); return (0); }
int main (int argc, char* argv[]) { /* check that we got an appropriate number of arguments */ if (argc != 1 && argc != 4) { printf("Usage: test_correctness [filesize times sleep_secs]\n"); return 1; } /* read parameters from command line, if any */ if (argc > 1) { filesize = (size_t) atol(argv[1]); times = atoi(argv[2]); seconds = atoi(argv[3]); } MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* time how long it takes to get through init */ MPI_Barrier(MPI_COMM_WORLD); double init_start = MPI_Wtime(); SCR_Init(); double init_end = MPI_Wtime(); double secs = init_end - init_start; MPI_Barrier(MPI_COMM_WORLD); /* compute and print the init stats */ double secsmin, secsmax, secssum; MPI_Reduce(&secs, &secsmin, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); MPI_Reduce(&secs, &secsmax, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&secs, &secssum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if (rank == 0) { printf("Init: Min %8.6f s\tMax %8.6f s\tAvg %8.6f s\n", secsmin, secsmax, secssum/ranks); } MPI_Barrier(MPI_COMM_WORLD); /* allocate space for the checkpoint data (make filesize a function of rank for some variation) */ filesize = filesize + rank; char* buf = (char*) malloc(filesize); /* get the name of our checkpoint file to open for read on restart */ char name[256]; char file[SCR_MAX_FILENAME]; sprintf(name, "rank_%d.ckpt", rank); int found_checkpoint = 0; if (SCR_Route_file(name, file) == SCR_SUCCESS) { if (read_checkpoint(file, ×tep, buf, filesize)) { /* read the file ok, now check that contents are good */ found_checkpoint = 1; //printf("%d: Successfully read checkpoint from %s\n", rank, file); if (!check_buffer(buf, filesize, rank, timestep)) { printf("%d: Invalid value in buffer\n", rank); MPI_Abort(MPI_COMM_WORLD, 1); return 1; } } else { printf("%d: Could not read checkpoint %d from %s\n", rank, timestep, file); } } else printf("%d: SCR_Route_file failed during restart attempt\n", rank); /* determine whether all tasks successfully read their checkpoint file */ int all_found_checkpoint = 0; MPI_Allreduce(&found_checkpoint, &all_found_checkpoint, 1, MPI_INT, MPI_LAND, MPI_COMM_WORLD); if (!all_found_checkpoint && rank == 0) { printf("At least one rank (perhaps all) did not find its checkpoint\n"); } /* check that everyone is at the same timestep */ int timestep_and, timestep_or; MPI_Allreduce(×tep, ×tep_and, 1, MPI_INT, MPI_BAND, MPI_COMM_WORLD); MPI_Allreduce(×tep, ×tep_or, 1, MPI_INT, MPI_BOR, MPI_COMM_WORLD); if (timestep_and != timestep_or) { printf("%d: Timesteps don't agree: timestep %d\n", rank, timestep); return 1; } /* make up some data for the next checkpoint */ init_buffer(buf, filesize, rank, timestep); timestep++; /* prime system once before timing */ getbw(name, buf, filesize, 1); /* now compute the bandwidth and print stats */ if (times > 0) { double bw = getbw(name, buf, filesize, times); MPI_Barrier(MPI_COMM_WORLD); /* compute stats and print them to the screen */ double bwmin, bwmax, bwsum; MPI_Reduce(&bw, &bwmin, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); MPI_Reduce(&bw, &bwmax, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&bw, &bwsum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if (rank == 0) { printf("FileIO: Min %7.2f MB/s\tMax %7.2f MB/s\tAvg %7.2f MB/s\tAgg %7.2f MB/s\n", bwmin, bwmax, bwsum/ranks, bwsum ); } } if (buf != NULL) { free(buf); buf = NULL; } SCR_Finalize(); MPI_Finalize(); return 0; }