static void f2fs_put_super(struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); #ifdef CONFIG_F2FS_STAT_FS if (sbi->s_proc) { f2fs_stat_exit(sbi); remove_proc_entry(sb->s_id, f2fs_proc_root); } #endif stop_gc_thread(sbi); write_checkpoint(sbi, false, true); iput(sbi->node_inode); iput(sbi->meta_inode); /* destroy f2fs internal modules */ destroy_gc_manager(sbi); destroy_node_manager(sbi); destroy_segment_manager(sbi); kfree(sbi->ckpt); sb->s_fs_info = NULL; brelse(sbi->raw_super_buf); kfree(sbi); }
void write_save_file(uint64_t p) { if (checkpoint_opt) write_checkpoint(p); if (!(checkpoint_opt && factors_opt)) write_output_files(p); }
static void f2fs_put_super(struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); if (sbi->s_proc) { remove_proc_entry("segment_info", sbi->s_proc); remove_proc_entry(sb->s_id, f2fs_proc_root); } kobject_del(&sbi->s_kobj); f2fs_destroy_stats(sbi); stop_gc_thread(sbi); /* We don't need to do checkpoint when it's clean */ if (sbi->s_dirty && get_pages(sbi, F2FS_DIRTY_NODES)) write_checkpoint(sbi, true); iput(sbi->node_inode); iput(sbi->meta_inode); /* destroy f2fs internal modules */ destroy_node_manager(sbi); destroy_segment_manager(sbi); kfree(sbi->ckpt); kobject_put(&sbi->s_kobj); wait_for_completion(&sbi->s_kobj_unregister); sb->s_fs_info = NULL; brelse(sbi->raw_super_buf); kfree(sbi); }
static void f2fs_put_super(struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); if (sbi->s_proc) { remove_proc_entry("segment_info", sbi->s_proc); remove_proc_entry(sb->s_id, f2fs_proc_root); } kobject_del(&sbi->s_kobj); stop_gc_thread(sbi); /* prevent remaining shrinker jobs */ mutex_lock(&sbi->umount_mutex); /* * We don't need to do checkpoint when superblock is clean. * But, the previous checkpoint was not done by umount, it needs to do * clean checkpoint again. */ if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) || !is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) { struct cp_control cpc = { .reason = CP_UMOUNT, }; write_checkpoint(sbi, &cpc); } /* write_checkpoint can update stat informaion */ f2fs_destroy_stats(sbi); /* * normally superblock is clean, so we need to release this. * In addition, EIO will skip do checkpoint, we need this as well. */ release_dirty_inode(sbi); release_discard_addrs(sbi); f2fs_leave_shrinker(sbi); mutex_unlock(&sbi->umount_mutex); iput(sbi->node_inode); iput(sbi->meta_inode); /* destroy f2fs internal modules */ destroy_node_manager(sbi); destroy_segment_manager(sbi); kfree(sbi->ckpt); kobject_put(&sbi->s_kobj); wait_for_completion(&sbi->s_kobj_unregister); sb->s_fs_info = NULL; brelse(sbi->raw_super_buf); kfree(sbi); }
// MEMBER FUNCTION void Trick::MemoryManager::write_checkpoint( std::ostream& out_s, const char* var_name) { std::vector<ALLOC_INFO*> dependencies; pthread_mutex_lock(&mm_mutex); get_alloc_deps_in_allocation( dependencies, var_name); pthread_mutex_unlock(&mm_mutex); write_checkpoint( out_s, dependencies); }
// MEMBER FUNCTION void Trick::MemoryManager::write_checkpoint(const char* filename) { std::ofstream outfile( filename, std::ios::out); if (outfile.is_open()) { write_checkpoint( outfile); } else { message_publish(MSG_ERROR, "Memory Manager ERROR: Couldn't open \"%s\".\n", filename) ; } }
// MEMBER FUNCTION void Trick::MemoryManager::write_checkpoint(const char* filename, std::vector<const char*>& var_name_list) { std::ofstream out_s( filename, std::ios::out); if (out_s.is_open()) { write_checkpoint( out_s, var_name_list); } else { std::cerr << "ERROR: Couldn't open \""<< filename <<"\"." << std::endl; std::cerr.flush(); } }
int f2fs_sync_fs(struct super_block *sb, int sync) { struct f2fs_sb_info *sbi = F2FS_SB(sb); int ret = 0; if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES)) return 0; if (sync) write_checkpoint(sbi, false, false); return ret; }
int f2fs_sync_fs(struct super_block *sb, int sync) { struct f2fs_sb_info *sbi = F2FS_SB(sb); trace_f2fs_sync_fs(sb, sync); if (sync) { struct cp_control cpc = { .reason = CP_SYNC, }; mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); } else {
// MEMBER FUNCTION void Trick::MemoryManager::write_checkpoint( std::ostream& out_s, std::vector<const char*>& var_name_list) { std::vector<ALLOC_INFO*> dependencies; const char* var_name; int n_names; n_names = var_name_list.size(); for (int ii=0; ii< n_names; ii++) { var_name = var_name_list[ii]; pthread_mutex_lock(&mm_mutex); get_alloc_deps_in_allocation(dependencies, var_name); pthread_mutex_unlock(&mm_mutex); } write_checkpoint( out_s, dependencies); }
int f2fs_sync_fs(struct super_block *sb, int sync) { struct f2fs_sb_info *sbi = F2FS_SB(sb); trace_f2fs_sync_fs(sb, sync); if (sync) { mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, false); mutex_unlock(&sbi->gc_mutex); } else { f2fs_balance_fs(sbi); } return 0; }
// MEMBER FUNCTION void Trick::MemoryManager::write_checkpoint( std::ostream& out_s) { ALLOC_INFO_MAP::iterator pos; ALLOC_INFO* alloc_info; std::vector<ALLOC_INFO*> dependencies; pthread_mutex_lock(&mm_mutex); for ( pos=alloc_info_map.begin() ; pos!=alloc_info_map.end() ; pos++ ) { alloc_info = pos->second; dependencies.push_back(alloc_info); } // Sort the dependencies by ALLOC_INFO.id. std::sort( dependencies.begin() , dependencies.end() , alloc_info_id_compare) ; pthread_mutex_unlock(&mm_mutex); write_checkpoint( out_s, dependencies); }
int f2fs_sync_fs(struct super_block *sb, int sync) { struct f2fs_sb_info *sbi = F2FS_SB(sb); trace_f2fs_sync_fs(sb, sync); if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES)) return 0; if (sync) { mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, false); mutex_unlock(&sbi->gc_mutex); } else { f2fs_balance_fs(sbi); } return 0; }
static void f2fs_put_super(struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); if (sbi->s_proc) { remove_proc_entry("segment_info", sbi->s_proc); remove_proc_entry(sb->s_id, f2fs_proc_root); } kobject_del(&sbi->s_kobj); f2fs_destroy_stats(sbi); stop_gc_thread(sbi); /* We don't need to do checkpoint when it's clean */ if (sbi->s_dirty) { struct cp_control cpc = { .reason = CP_UMOUNT, }; write_checkpoint(sbi, &cpc); } /* * normally superblock is clean, so we need to release this. * In addition, EIO will skip do checkpoint, we need this as well. */ release_dirty_inode(sbi); release_discard_addrs(sbi); iput(sbi->node_inode); iput(sbi->meta_inode); /* destroy f2fs internal modules */ destroy_node_manager(sbi); destroy_segment_manager(sbi); kfree(sbi->ckpt); kobject_put(&sbi->s_kobj); wait_for_completion(&sbi->s_kobj_unregister); sb->s_fs_info = NULL; brelse(sbi->raw_super_buf); kfree(sbi); }
static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct cp_control cpc; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (f2fs_readonly(sbi->sb)) return -EROFS; cpc.reason = __get_cp_reason(sbi); mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); return 0; }
int f2fs_sync_fs(struct super_block *sb, int sync) { struct f2fs_sb_info *sbi = F2FS_SB(sb); int err = 0; trace_f2fs_sync_fs(sb, sync); if (sync) { struct cp_control cpc; cpc.reason = __get_cp_reason(sbi); mutex_lock(&sbi->gc_mutex); err = write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); } f2fs_trace_ios(NULL, 1); return err; }
static void f2fs_put_super(struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); f2fs_destroy_stats(sbi); stop_gc_thread(sbi); write_checkpoint(sbi, true); iput(sbi->node_inode); iput(sbi->meta_inode); /* destroy f2fs internal modules */ destroy_node_manager(sbi); destroy_segment_manager(sbi); kfree(sbi->ckpt); sb->s_fs_info = NULL; brelse(sbi->raw_super_buf); kfree(sbi); }
void MaxPosterior::DE(IM im, popTree* poptree, Chain coldCh, unsigned int nProcs, unsigned int crr_procID) { /* if(crr_procID == 0) std::cout << "In MaxPosterior::DE()\n"; */ unsigned int conv = 0; unsigned int iter=0; while(conv == 0 && iter < nIters) { //if(crr_procID==0) //std::cout << "Starting DE_eachIter\n"; DE_eachIter(im, poptree, coldCh, nProcs, crr_procID); //if(crr_procID==0) //std::cout << "Ending DE_eachIter\n"; // YC 1/12/2015 // There are two criteria used to determine the convergence // the first criterion is same as that in IMa2 // and the 2nd criterion is added. It is very difficult // to satisfy the 1st criterion if the maximum posterior value is very large, // but the 2nd criterion would determine the convergence earlier. maxDist = 0; if(logPosteriorMax-logPosteriorMin < pow(10,-4) && abs((logPosteriorMax-logPosteriorMin)/logPosteriorMax) < pow(10,-4)) { /* if(crr_procID ==0) { // std::vector<long double>::const_iterator iter_max; // iter_max = max_element(posterior_atCrr.begin(), posterior_atCrr.end()); Eigen::MatrixXd maxPara = para_atCrr.row(ID_max); for(unsigned int ii=0; ii<nParaVectors; ii++) { if(ii!=ID_max) { double dd = max((para_atCrr.row(ii)-maxPara).maxCoeff(),(maxPara-para_atCrr.row(ii)).maxCoeff() ); if(maxDist <dd) maxDist = dd; } } } MPI::COMM_WORLD.Barrier(); MPI::COMM_WORLD.Bcast(&maxDist, 1, MPI_DOUBLE, 0); MPI::COMM_WORLD.Barrier(); if(maxDist < pow(10,-4)) { conv = 1; } */ conv =1; // std::cout <<"iter= "<<iter <<" logPosteriorMax-logPosteriorMin="<<logPosteriorMax-logPosteriorMin <<" (logPosteriorMax-logPosteriorMin)/logPosteriorMax="<<(logPosteriorMax-logPosteriorMin)/logPosteriorMax <<"\n"; } if(crr_procID == 0) { if(iter - 100* static_cast<unsigned int>(iter/100) == 0) { /* unsigned int found_min = 0; unsigned int count_min = 0; while( count_min < nParaVectors && found_min==0) { if(logPosteriorMin==posterior_atCrr.at(count_min)) found_min = 1; else count_min++; } */ std::cout << "\n\niter = " << iter <<": the largest log(posterior) = "<< logPosteriorMax << ", the smallest log(posterior) = " << logPosteriorMin <<"\n"; /* if(maxDist !=0) std::cout << "maxDist = " << maxDist <<"\n"; */ std::cout << "Estimates with the largest posterior: " << para_atCrr.row(ID_max) <<"\n"; // std::cout << "Miminum a posterior estimates: " << para_atCrr.row(count_min) <<"\n"; /* std::cout << "The total computing time for getting eigen values and eigen vectors: " << totalComputingTime_eigen.count()/1000000 <<"(sec)\n"; std::cout << "The function was called " << totalNum_eigenFunctionCalls << " times\n"; std::cout << "The total computing time for computing the coalescent conditional probabilities: " << totalComputingTime_condiProb.count()/1000000 <<"(sec)\n"; std::cout << "The function was called " << totalNum_condiProbFunctionCalls << " times\n"; */ } else if(iter - 10* static_cast<unsigned int>(iter/10) == 0) { std::cout <<"."; } } // write a checkpoint if(crr_procID ==0) { if(checkpoint == 1|| checkpoint==3) { if(iter - howOften_checkpoint * static_cast<unsigned int>(iter/howOften_checkpoint) ==0) { write_checkpoint(); } } } iter++; } if(conv==0 && crr_procID==0) { std::cout << "\nWarning: the optimization did not converge.\n"; } if(crr_procID == 0) { std::cout << "\nMaximum a posterior estimates: " << para_atCrr.row(ID_max) << "\nlog(posterior density) = " << logPosteriorMax <<" \n at iteration " << iter <<"\n"; /* std::cout << "Computing the posterior means...\n"; marginals.computeMeanIntervals_forDE(); std::cout << "Done.\n"; std::cout << "Saving the posterior means..\n"; marginals.saveMeanIntervals(); std::cout << "Done.\n"; //std::cout << "Saving the histograms..\n"; marginals.saveHistogram_forDE(); //std::cout << "Done\n"; */ } return; }
void mdoutf_write_to_trajectory_files(FILE *fplog, t_commrec *cr, gmx_mdoutf_t of, int mdof_flags, gmx_mtop_t *top_global, gmx_int64_t step, double t, t_state *state_local, t_state *state_global, rvec *f_local, rvec *f_global) { rvec *local_v; rvec *global_v; /* MRS -- defining these variables is to manage the difference * between half step and full step velocities, but there must be a better way . . . */ local_v = state_local->v; global_v = state_global->v; if (DOMAINDECOMP(cr)) { if (mdof_flags & MDOF_CPT) { dd_collect_state(cr->dd, state_local, state_global); } else { if (mdof_flags & (MDOF_X | MDOF_X_COMPRESSED)) { dd_collect_vec(cr->dd, state_local, state_local->x, state_global->x); } if (mdof_flags & MDOF_V) { dd_collect_vec(cr->dd, state_local, local_v, global_v); } } if (mdof_flags & MDOF_F) { dd_collect_vec(cr->dd, state_local, f_local, f_global); } } else { if (mdof_flags & MDOF_CPT) { /* All pointers in state_local are equal to state_global, * but we need to copy the non-pointer entries. */ state_global->lambda = state_local->lambda; state_global->veta = state_local->veta; state_global->vol0 = state_local->vol0; copy_mat(state_local->box, state_global->box); copy_mat(state_local->boxv, state_global->boxv); copy_mat(state_local->svir_prev, state_global->svir_prev); copy_mat(state_local->fvir_prev, state_global->fvir_prev); copy_mat(state_local->pres_prev, state_global->pres_prev); } } if (MASTER(cr)) { if (mdof_flags & MDOF_CPT) { fflush_tng(of->tng); fflush_tng(of->tng_low_prec); write_checkpoint(of->fn_cpt, of->bKeepAndNumCPT, fplog, cr, of->eIntegrator, of->simulation_part, of->bExpanded, of->elamstats, step, t, state_global); } if (mdof_flags & (MDOF_X | MDOF_V | MDOF_F)) { if (of->fp_trn) { gmx_trr_write_frame(of->fp_trn, step, t, state_local->lambda[efptFEP], state_local->box, top_global->natoms, (mdof_flags & MDOF_X) ? state_global->x : NULL, (mdof_flags & MDOF_V) ? global_v : NULL, (mdof_flags & MDOF_F) ? f_global : NULL); if (gmx_fio_flush(of->fp_trn) != 0) { gmx_file("Cannot write trajectory; maybe you are out of disk space?"); } } gmx_fwrite_tng(of->tng, FALSE, step, t, state_local->lambda[efptFEP], state_local->box, top_global->natoms, (mdof_flags & MDOF_X) ? state_global->x : NULL, (mdof_flags & MDOF_V) ? global_v : NULL, (mdof_flags & MDOF_F) ? f_global : NULL); } if (mdof_flags & MDOF_X_COMPRESSED) { rvec *xxtc = NULL; if (of->natoms_x_compressed == of->natoms_global) { /* We are writing the positions of all of the atoms to the compressed output */ xxtc = state_global->x; } else { /* We are writing the positions of only a subset of the atoms to the compressed output, so we have to make a copy of the subset of coordinates. */ int i, j; snew(xxtc, of->natoms_x_compressed); for (i = 0, j = 0; (i < of->natoms_global); i++) { if (ggrpnr(of->groups, egcCompressedX, i) == 0) { copy_rvec(state_global->x[i], xxtc[j++]); } } } if (write_xtc(of->fp_xtc, of->natoms_x_compressed, step, t, state_local->box, xxtc, of->x_compression_precision) == 0) { gmx_fatal(FARGS, "XTC error - maybe you are out of disk space?"); } gmx_fwrite_tng(of->tng_low_prec, TRUE, step, t, state_local->lambda[efptFEP], state_local->box, of->natoms_x_compressed, xxtc, NULL, NULL); if (of->natoms_x_compressed != of->natoms_global) { sfree(xxtc); } } } }
/* This function is called (via check_events()) from the top level sieve loops (prime_sieve() etc.). It can assume that it is safe to tighten any sieving parameters other than p_min and p_max. */ void process_events(uint64_t current_prime) { /* event_happened was set last in notify_event(), so clear it first which ensures that if some signal arrives while we are in process_events() it might have to wait until the next sieve iteration to get processed, but it won't be lost. */ event_happened = 0; if (clear_event(initialise_events)) { init_signals(); init_progress_report(current_prime); } if (clear_event(sieve_parameters_changed)) init_progress_report(current_prime); if (clear_event(received_sigterm)) { finish_srsieve("SIGTERM was received",current_prime); signal(SIGTERM,SIG_DFL); raise(SIGTERM); } if (clear_event(received_sigint)) { finish_srsieve("SIGINT was received",current_prime); signal(SIGINT,SIG_DFL); raise(SIGINT); } #ifdef SIGHUP if (clear_event(received_sighup)) { finish_srsieve("SIGHUP was received",current_prime); signal(SIGHUP,SIG_DFL); raise(SIGHUP); } #endif #if HAVE_FORK if (clear_event(received_sigpipe)) { finish_srsieve("SIGPIPE was received",current_prime); signal(SIGPIPE,SIG_DFL); raise(SIGPIPE); } if (clear_event(received_sigchld)) { finish_srsieve("SIGCHLD was received",current_prime); signal(SIGCHLD,SIG_DFL); raise(SIGCHLD); exit(EXIT_FAILURE); } #endif if (clear_event(factor_found)) next_report_due = time(NULL); if (clear_event(report_due)) progress_report(current_prime); if (clear_event(save_due)) { #if SOBISTRATOR_OPT if (sobistrator_opt) sob_write_checkpoint(current_prime); #endif write_checkpoint(current_prime); } }
void checkdata(char* file, size_t size, int times) { char* buf = malloc(size); MPI_Barrier(MPI_COMM_WORLD); if (times > 0) { /* write the checkpoint file */ int i, j; for(i=0; i < times; i++) { int rc; int valid = 0; rc = init_buffer(buf, size, rank, i); if (rank == 0) { printf("Writing checkpoint %d.\n", i); fflush(stdout); } /* open the file and write the checkpoint */ int fd_me = open(file, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); if (fd_me > 0) { valid = 1; /* write the checkpoint data */ rc = write_checkpoint(fd_me, rank, i, buf, size); if (rc < 0) { valid = 0; } /* force the data to storage */ rc = fsync(fd_me); if (rc < 0) { valid = 0; } /* make sure the close is without error */ rc = close(fd_me); if (rc < 0) { valid = 0; } } if (rank == 0) { printf("Completed checkpoint %d.\n", i); fflush(stdout); } if (rank == 0) { printf("Reading checkpoint %d.\n", i); fflush(stdout); } memset(buf, 0, size); /* open the file and write the checkpoint */ int read_rank, read_timestep; fd_me = open(file, O_RDONLY); if (fd_me > 0) { valid = 1; /* write the checkpoint data */ rc = read_checkpoint(fd_me, &read_rank, &read_timestep, buf, size); if (rc < 0) { valid = 0; } /* make sure the close is without error */ rc = close(fd_me); if (rc < 0) { valid = 0; } } if (read_rank != rank || read_timestep != i) { printf("INVALID HEADER on rank %d in step %d\n", rank, i); fflush(stdout); MPI_Abort(MPI_COMM_WORLD, 0); } rc = check_buffer(buf, size, rank, i); if (! rc) { printf("INVALID DATA on rank %d in step %d\n", rank, i); fflush(stdout); MPI_Abort(MPI_COMM_WORLD, 0); } if (rank == 0) { printf("Verified checkpoint %d.\n", read_timestep); fflush(stdout); } /* optionally sleep for some time */ if (seconds > 0) { if (rank == 0) { printf("Sleeping for %d seconds... \n", seconds); fflush(stdout); } sleep(seconds); } unlink(file); } } MPI_Barrier(MPI_COMM_WORLD); if (buf != NULL) { free(buf); buf = NULL; } return; }
int main (int argc, char* argv[]) { char *path_to_stdout = NULL; int scr_retval; /* check that we got an appropriate number of arguments */ if (argc == 2) { path_to_stdout = argv[1]; } else if(argc == 5){ filesize = (size_t) atol(argv[1]); times = atoi(argv[2]); seconds = atoi(argv[3]); path_to_stdout = argv[4]; } else{ printf("Usage: test_api_file [filesize times sleep_secs path_to_stdout]\n"); printf("OR: test_api_file [ path_to_stdout]\n"); exit(1); } MPI_Init(&argc, &argv); int rank = -1, size = 0; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); /* open file for stdout */ printf("new stdout filename: \"%s\"\n", path_to_stdout); fflush(stdout); freopen(path_to_stdout, "a+", stdout); MPI_Barrier(MPI_COMM_WORLD); /* time how long it takes to get through init */ MPI_Barrier(MPI_COMM_WORLD); double init_start = MPI_Wtime(); if (SCR_Init() != SCR_SUCCESS){ printf("FAILED INITIALIZING SCR\n"); fclose(stdout); return -1; } double init_end = MPI_Wtime(); double secs = init_end - init_start; MPI_Barrier(MPI_COMM_WORLD); double secsmin, secsmax, secssum; MPI_Reduce(&secs, &secsmin, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); MPI_Reduce(&secs, &secsmax, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&secs, &secssum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if (rank == 0) { printf("Init: Min %8.6f s\tMax %8.6f s\tAvg %8.6f s\n", secsmin, secsmax, secssum/size); } MPI_Barrier(MPI_COMM_WORLD); int num_files = rank % 4; char** files = NULL; char** bufs = NULL; size_t* filesizes = NULL; char* buf = NULL; if (num_files > 0) { files = (char**) malloc(num_files * sizeof(char*)); bufs = (char**) malloc(num_files * sizeof(char*)); filesizes = (size_t*) malloc(num_files * sizeof(size_t)); } int i; for (i=0; i < num_files; i++) { // route our checkpoint file char name[256]; sprintf(name, "rank_%d.%d.ckpt", rank, i); files[i] = strdup(name); filesizes[i] = filesize + rank + 2*i; bufs[i] = (char*) malloc(filesizes[i]); } if (num_files > 0) { buf = (char*) malloc(filesizes[num_files-1]); } // check each of our checkpoint files int found_checkpoint = 1; for (i=0; i < num_files; i++) { char file[2094]; scr_retval = SCR_Route_file(files[i], file); if (scr_retval != SCR_SUCCESS) { printf("%d: failed calling SCR_Route_file(): %d: @%s:%d\n", rank, scr_retval, __FILE__, __LINE__ ); } if (read_checkpoint(file, ×tep, buf, filesizes[i])) { // check that contents are good if (!check_buffer(buf, filesizes[i], rank + 2*i, timestep)) { printf("!!!!CORRUPTION!!!! Rank %d, File %s: Invalid value in buffer\n", rank, file); fflush(stdout); fclose(stdout); MPI_Abort(MPI_COMM_WORLD, 1); return 1; } } else { found_checkpoint = 0; } } // check that everyone found their checkpoint files ok int all_found_checkpoint = 0; MPI_Allreduce(&found_checkpoint, &all_found_checkpoint, 1, MPI_INT, MPI_LAND, MPI_COMM_WORLD); if (!all_found_checkpoint && rank == 0) { printf("At least one rank (perhaps all) did not find its checkpoint\n"); fflush(stdout); } // check that everyone is at the same timestep int timestep_and, timestep_or; int timestep_a, timestep_o; if (num_files > 0) { timestep_a = timestep; timestep_o = timestep; } else { timestep_a = 0xffffffff; timestep_o = 0x00000000; } MPI_Allreduce(×tep_a, ×tep_and, 1, MPI_INT, MPI_BAND, MPI_COMM_WORLD); MPI_Allreduce(×tep_o, ×tep_or, 1, MPI_INT, MPI_BOR, MPI_COMM_WORLD); if (timestep_and != timestep_or) { printf("%d: Timesteps don't agree: timestep %d\n", rank, timestep); fflush(stdout); fclose(stdout); return 1; } timestep = timestep_and; // make up some data for the next checkpoint for (i=0; i < num_files; i++) { init_buffer(bufs[i], filesizes[i], rank + 2*i, timestep); } timestep++; // prime system once before timing int t; for(t=0; t < 1; t++) { int rc; int all_valid = 1; scr_retval = SCR_Start_checkpoint(); if (scr_retval != SCR_SUCCESS) { printf("%d: failed calling SCR_Start_checkpoint(): %d: @%s:%d\n", rank, scr_retval, __FILE__, __LINE__ ); } for (i=0; i < num_files; i++) { int valid = 0; char file[2094]; scr_retval = SCR_Route_file(files[i], file); if (scr_retval != SCR_SUCCESS) { printf("%d: failed calling SCR_route_file(): %d: @%s:%d\n", rank, scr_retval, __FILE__, __LINE__ ); } int fd_me = open(file, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); if (fd_me > 0) { valid = 1; // write the checkpoint rc = write_checkpoint(fd_me, timestep, bufs[i], filesizes[i]); if (rc < 0) { valid = 0; } rc = fsync(fd_me); if (rc < 0) { valid = 0; } // make sure the close is without error rc = close(fd_me); if (rc < 0) { valid = 0; } } if (!valid) { all_valid = 0; } } scr_retval = SCR_Complete_checkpoint(all_valid); if (scr_retval != SCR_SUCCESS) { printf("%d: failed calling SCR_Complete_checkpoint(): %d: @%s:%d\n", rank, scr_retval, __FILE__, __LINE__ ); } if (rank == 0) { printf("Completed checkpoint %d.\n", timestep); fflush(stdout); } timestep++; } MPI_Barrier(MPI_COMM_WORLD); if (times > 0) { int count = 0; double time_start = MPI_Wtime(); for(t=0; t < times; t++) { int rc; int all_valid = 1; scr_retval = SCR_Start_checkpoint(); if (scr_retval != SCR_SUCCESS) { printf("%d: failed calling SCR_Start_checkpoint(): %d: @%s:%d\n", rank, scr_retval, __FILE__, __LINE__ ); } for (i=0; i < num_files; i++) { int valid = 0; char file[2094]; scr_retval = SCR_Route_file(files[i], file); if (scr_retval != SCR_SUCCESS) { printf("%d: failed calling SCR_Route_file(): %d: @%s:%d\n", rank, scr_retval, __FILE__, __LINE__ ); } int fd_me = open(file, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); if (fd_me > 0) { count++; valid = 1; // write the checkpoint rc = write_checkpoint(fd_me, timestep, bufs[i], filesizes[i]); if (rc < 0) { valid = 0; } rc = fsync(fd_me); if (rc < 0) { valid = 0; } // make sure the close is without error rc = close(fd_me); if (rc < 0) { valid = 0; } } if (!valid) { all_valid = 0; } } scr_retval = SCR_Complete_checkpoint(all_valid); if (scr_retval != SCR_SUCCESS) { printf("%d: failed calling SCR_Complete_checkpoint(): %d: @%s:%d\n", rank, scr_retval, __FILE__, __LINE__ ); } if (rank == 0) { printf("Completed checkpoint %d.\n", timestep); fflush(stdout); } timestep++; if (seconds > 0) { if (rank == 0) { printf("Sleeping for %d seconds... \n", seconds); fflush(stdout); } sleep(seconds); } } double time_end = MPI_Wtime(); double bw = (filesize*count/(1024*1024)) / (time_end - time_start); MPI_Barrier(MPI_COMM_WORLD); double bwmin, bwmax, bwsum; MPI_Reduce(&bw, &bwmin, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); MPI_Reduce(&bw, &bwmax, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&bw, &bwsum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if (rank == 0) { printf("FileIO: Min %7.2f MB/s\tMax %7.2f MB/s\tAvg %7.2f MB/s\n", bwmin, bwmax, bwsum/size); } } if (buf != NULL) { free(buf); buf = NULL; } for (i=0; i < num_files; i++) { if (bufs[i] != NULL) { free(bufs[i]); bufs[i] = NULL; } if (files[i] != NULL) { free(files[i]); files[i] = NULL; } } if (files != NULL) { free(files); files = NULL; } if (bufs != NULL) { free(bufs); bufs = NULL; } if (filesizes != NULL) { free(filesizes); filesizes = NULL; } scr_retval = SCR_Finalize(); if (scr_retval != SCR_SUCCESS) { printf("%d: failed calling SCR_Finalize(): %d: @%s:%d\n", rank, scr_retval, __FILE__, __LINE__ ); } MPI_Finalize(); fclose(stdout); return 0; }
void run_gp(multipop *mpop, int startgen, event *t_eval, event *t_breed, int startfromcheckpoint) { char *param; int gen; int maxgen; int exch_gen; int i, j; int checkinterval; char *checkfileformat; char *checkfilename = NULL; event start, end, diff; int term = 0; termination_override =0; int stt_interval; int bestn; if (!startfromcheckpoint) { /* get the number of top individuals to track. */ bestn = atoi(get_parameter("output.bestn")); if (bestn < 1) { error( E_WARNING, "\"output.bestn\" must be at least 1. defaulting to 1."); bestn = 1; } /* allocate statistics for overall run. */ run_stats = (popstats *) MALLOC((mpop->size + 1) * sizeof(popstats)); for (i = 0; i < mpop->size + 1; ++i) { run_stats[i].bestn = bestn; run_stats[i].size = -1; } /* initialize the linked list of saved individuals. */ saved_head = (saved_ind *) MALLOC(sizeof(saved_ind)); saved_head->ind = NULL; saved_head->refcount = 0; saved_head->next = NULL; saved_tail = saved_head; } /* get the maximum number of generations. */ param = get_parameter("max_generations"); if (param == NULL) error( E_FATAL_ERROR, "no value specified for \"max_generations\"."); maxgen = atoi(param); if (maxgen <= 0) error( E_FATAL_ERROR, "\"max_generations\" must be greater than zero."); /* get the interval for subpopulation exchanges, if there is more than one subpopulation. */ if (mpop->size > 1) { param = get_parameter("multiple.exch_gen"); if (param == NULL) error( E_FATAL_ERROR, "no value specified for \"multiple.exch_gen\"."); exch_gen = atoi(param); if (exch_gen <= 0) error( E_FATAL_ERROR, "\"multiple.exch_gen\" must be greater than zero."); } /* get the interval for doing checkpointing. */ param = get_parameter("checkpoint.interval"); if (param == NULL) /* checkpointing disabled. */ checkinterval = -1; else checkinterval = atoi(param); /* get the format string for the checkpoint filenames. */ checkfileformat = get_parameter("checkpoint.filename"); checkfilename = (char *) MALLOC(strlen(checkfileformat) + 50); /* get the interval for writing information to the .stt file. */ stt_interval = atoi(get_parameter("output.stt_interval")); if (stt_interval < 1) error( E_FATAL_ERROR, "\"output.stt_interval\" must be greater than zero."); oputs( OUT_SYS, 10, "\n\nstarting evolution.\n"); /* print out how often we'll be doing checkpointing. */ if (checkinterval > 0) oprintf( OUT_SYS, 20, "checkpointing will be done every %d generations and " "after the last generation.\n", checkinterval); else if (checkinterval == 0) oprintf( OUT_SYS, 20, "checkpointing will be done only after the last " "generation.\n"); else oprintf( OUT_SYS, 20, "no checkpointing will be done.\n"); /* the big loop. */ for (gen = startgen; gen < maxgen && !term; ++gen) { oprintf( OUT_SYS, 20, "=== generation %d.\n", gen); generation_No = gen; /* unless this is the first generation after loading a checkpoint file... */ if (!(startfromcheckpoint && gen == startgen)) { /* evaluate the population. */ event_mark(&start); for (i = 0; i < mpop->size; ++i) { //generation_No = i; evaluate_pop(mpop->pop[i]); } event_mark(&end); event_diff(&diff, &start, &end); #ifdef TIMING_AVAILABLE oprintf( OUT_SYS, 40, " evaluation complete. (%s)\n", event_string(&diff)); #else oprintf ( OUT_SYS, 40, " evaluation complete.\n" ); #endif event_accum(t_eval, &diff); /* calculate and print statistics. returns 1 if user termination criterion was met, 0 otherwise. */ term = generation_information(gen, mpop, stt_interval, run_stats[0].bestn); if (term) { //oprintf( OUT_SYS, 30, "user termination criterion met.\n"); /*extern float *optimal_in_generation; extern int *optimal_index_in_generation; extern int same_optimal_count; int i; for (i = 0; i < generationSIZE; i++) { if ((int) optimal_in_generation[i] == -1) { printf("tried to Break"); break; } printf("Index: %d ERR : %f -Index %d Same : %i\n", i, optimal_in_generation[i], optimal_index_in_generation[i], same_optimal_count); }*/ } flush_output_streams(); } /** write a checkpoint file if checkinterval is non-negative and: we've reached the last generation, or the user termination criterion has been met, or we've reached the specified checkpoint interval. **/ if (checkinterval >= 0 && (gen == maxgen || term || (checkinterval > 0 && gen > startgen && (gen % checkinterval) == 0))) { sprintf(checkfilename, checkfileformat, gen); write_checkpoint(gen, mpop, checkfilename); } /** if this is not the last generation and the user criterion hasn't been met, then do breeding. **/ if (gen != maxgen && !term) { /** exchange subpops if it's time. **/ if (mpop->size > 1 && gen && (gen % exch_gen) == 0) { exchange_subpopulations(mpop); oprintf( OUT_SYS, 10, " subpopulation exchange complete.\n"); } /* breed the new population. */ event_mark(&start); for (i = 0; i < mpop->size; ++i) mpop->pop[i] = change_population(mpop->pop[i], mpop->bpt[i]); event_mark(&end); event_diff(&diff, &start, &end); /* call the application end-of-breeding callback. */ app_end_of_breeding(gen, mpop); #ifdef TIMING_AVAILABLE oprintf( OUT_SYS, 30, " breeding complete. (%s)\n", event_string(&diff)); #else oprintf ( OUT_SYS, 30, " breeding complete.\n" ); #endif event_accum(t_breed, &diff); } /* free unused ERCs. */ ephem_const_gc(); flush_output_streams(); } /** free up a lot of stuff before returning. */ if (checkfilename) FREE(checkfilename); ephem_const_gc(); for (i = 0; i < mpop->size + 1; ++i) { for (j = 0; j < run_stats[i].bestn; ++j) --run_stats[i].best[j]->refcount; FREE(run_stats[i].best); } FREE(run_stats); saved_individual_gc(); FREE(saved_head); }
double getbw(char* name, char* buf, size_t size, int times) { char file[SCR_MAX_FILENAME]; double bw = 0.0; if (times > 0) { /* start the timer */ double time_start = MPI_Wtime(); /* write the checkpoint file */ int i, count = 0; for(i=0; i < times; i++) { int rc; int valid = 0; /* int need_checkpoint; SCR_Need_checkpoint(&need_checkpoint); if (need_checkpoint) { */ /* instruct SCR we are starting the next checkpoint */ SCR_Start_checkpoint(); /* get the file name to write our checkpoint file to */ char newname[SCR_MAX_FILENAME]; sprintf(newname, "timestep.%d/%s", timestep, name); SCR_Route_file(newname, file); /* open the file and write the checkpoint */ int fd_me = open(file, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); if (fd_me > 0) { count++; valid = 1; /* write the checkpoint data */ rc = write_checkpoint(fd_me, timestep, buf, size); if (rc < 0) { valid = 0; printf("%d: Error writing to %s\n", rank, file); } /* force the data to storage */ rc = fsync(fd_me); if (rc < 0) { valid = 0; printf("%d: Error fsync %s\n", rank, file); } /* make sure the close is without error */ rc = close(fd_me); if (rc < 0) { valid = 0; printf("%d: Error closing %s\n", rank, file); } } else { printf("%d: Could not open file %s\n", rank, file); } /* if( valid ) printf("%d: Wrote checkpoint to %s\n", rank, file); */ /* mark this checkpoint as complete */ SCR_Complete_checkpoint(valid); if (rank == 0) { printf("Completed checkpoint %d.\n", timestep); fflush(stdout); } /* } */ /* increase the timestep counter */ timestep++; /* optionally sleep for some time */ if (seconds > 0) { if (rank == 0) { printf("Sleeping for %d seconds... \n", seconds); fflush(stdout); } sleep(seconds); } } /* stop the timer and compute the bandwidth */ double time_end = MPI_Wtime(); bw = ((size * count) / (1024*1024)) / (time_end - time_start); } return bw; }