Example #1
0
static void f2fs_put_super(struct super_block *sb)
{
	struct f2fs_sb_info *sbi = F2FS_SB(sb);

#ifdef CONFIG_F2FS_STAT_FS
	if (sbi->s_proc) {
		f2fs_stat_exit(sbi);
		remove_proc_entry(sb->s_id, f2fs_proc_root);
	}
#endif
	stop_gc_thread(sbi);

	write_checkpoint(sbi, false, true);

	iput(sbi->node_inode);
	iput(sbi->meta_inode);

	/* destroy f2fs internal modules */
	destroy_gc_manager(sbi);
	destroy_node_manager(sbi);
	destroy_segment_manager(sbi);

	kfree(sbi->ckpt);

	sb->s_fs_info = NULL;
	brelse(sbi->raw_super_buf);
	kfree(sbi);
}
Example #2
0
void write_save_file(uint64_t p)
{
  if (checkpoint_opt)
    write_checkpoint(p);
  if (!(checkpoint_opt && factors_opt))
    write_output_files(p);
}
static void f2fs_put_super(struct super_block *sb)
{
	struct f2fs_sb_info *sbi = F2FS_SB(sb);

	if (sbi->s_proc) {
		remove_proc_entry("segment_info", sbi->s_proc);
		remove_proc_entry(sb->s_id, f2fs_proc_root);
	}
	kobject_del(&sbi->s_kobj);

	f2fs_destroy_stats(sbi);
	stop_gc_thread(sbi);

	/* We don't need to do checkpoint when it's clean */
	if (sbi->s_dirty && get_pages(sbi, F2FS_DIRTY_NODES))
		write_checkpoint(sbi, true);

	iput(sbi->node_inode);
	iput(sbi->meta_inode);

	/* destroy f2fs internal modules */
	destroy_node_manager(sbi);
	destroy_segment_manager(sbi);

	kfree(sbi->ckpt);
	kobject_put(&sbi->s_kobj);
	wait_for_completion(&sbi->s_kobj_unregister);

	sb->s_fs_info = NULL;
	brelse(sbi->raw_super_buf);
	kfree(sbi);
}
Example #4
0
static void f2fs_put_super(struct super_block *sb)
{
	struct f2fs_sb_info *sbi = F2FS_SB(sb);

	if (sbi->s_proc) {
		remove_proc_entry("segment_info", sbi->s_proc);
		remove_proc_entry(sb->s_id, f2fs_proc_root);
	}
	kobject_del(&sbi->s_kobj);

	stop_gc_thread(sbi);

	/* prevent remaining shrinker jobs */
	mutex_lock(&sbi->umount_mutex);

	/*
	 * We don't need to do checkpoint when superblock is clean.
	 * But, the previous checkpoint was not done by umount, it needs to do
	 * clean checkpoint again.
	 */
	if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
			!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) {
		struct cp_control cpc = {
			.reason = CP_UMOUNT,
		};
		write_checkpoint(sbi, &cpc);
	}

	/* write_checkpoint can update stat informaion */
	f2fs_destroy_stats(sbi);

	/*
	 * normally superblock is clean, so we need to release this.
	 * In addition, EIO will skip do checkpoint, we need this as well.
	 */
	release_dirty_inode(sbi);
	release_discard_addrs(sbi);

	f2fs_leave_shrinker(sbi);
	mutex_unlock(&sbi->umount_mutex);

	iput(sbi->node_inode);
	iput(sbi->meta_inode);

	/* destroy f2fs internal modules */
	destroy_node_manager(sbi);
	destroy_segment_manager(sbi);

	kfree(sbi->ckpt);
	kobject_put(&sbi->s_kobj);
	wait_for_completion(&sbi->s_kobj_unregister);

	sb->s_fs_info = NULL;
	brelse(sbi->raw_super_buf);
	kfree(sbi);
}
// MEMBER FUNCTION
void Trick::MemoryManager::write_checkpoint( std::ostream& out_s, const char* var_name) {

    std::vector<ALLOC_INFO*> dependencies;

    pthread_mutex_lock(&mm_mutex);
    get_alloc_deps_in_allocation( dependencies, var_name);
    pthread_mutex_unlock(&mm_mutex);

    write_checkpoint( out_s, dependencies);
}
// MEMBER FUNCTION
void Trick::MemoryManager::write_checkpoint(const char* filename) {

   std::ofstream outfile( filename, std::ios::out);

    if (outfile.is_open()) {
        write_checkpoint( outfile);
    } else {
        message_publish(MSG_ERROR, "Memory Manager ERROR: Couldn't open \"%s\".\n", filename) ;
    }
}
// MEMBER FUNCTION
void Trick::MemoryManager::write_checkpoint(const char* filename, std::vector<const char*>& var_name_list) {

    std::ofstream out_s( filename, std::ios::out);

    if (out_s.is_open()) {
        write_checkpoint( out_s, var_name_list);
    } else {
        std::cerr << "ERROR: Couldn't open \""<< filename <<"\"." << std::endl;
        std::cerr.flush();
    }
}
Example #8
0
int f2fs_sync_fs(struct super_block *sb, int sync)
{
	struct f2fs_sb_info *sbi = F2FS_SB(sb);
	int ret = 0;

	if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES))
		return 0;

	if (sync)
		write_checkpoint(sbi, false, false);

	return ret;
}
int f2fs_sync_fs(struct super_block *sb, int sync)
{
    struct f2fs_sb_info *sbi = F2FS_SB(sb);

    trace_f2fs_sync_fs(sb, sync);

    if (sync) {
        struct cp_control cpc = {
            .reason = CP_SYNC,
        };
        mutex_lock(&sbi->gc_mutex);
        write_checkpoint(sbi, &cpc);
        mutex_unlock(&sbi->gc_mutex);
    } else {
// MEMBER FUNCTION
void Trick::MemoryManager::write_checkpoint( std::ostream& out_s, std::vector<const char*>& var_name_list) {

    std::vector<ALLOC_INFO*> dependencies;
    const char* var_name;
    int n_names;

    n_names = var_name_list.size();
    for (int ii=0; ii< n_names; ii++) {
        var_name = var_name_list[ii];
        pthread_mutex_lock(&mm_mutex);
        get_alloc_deps_in_allocation(dependencies, var_name);
        pthread_mutex_unlock(&mm_mutex);
    }

    write_checkpoint( out_s, dependencies);
}
int f2fs_sync_fs(struct super_block *sb, int sync)
{
	struct f2fs_sb_info *sbi = F2FS_SB(sb);

	trace_f2fs_sync_fs(sb, sync);

	if (sync) {
		mutex_lock(&sbi->gc_mutex);
		write_checkpoint(sbi, false);
		mutex_unlock(&sbi->gc_mutex);
	} else {
		f2fs_balance_fs(sbi);
	}

	return 0;
}
// MEMBER FUNCTION
void Trick::MemoryManager::write_checkpoint( std::ostream& out_s) {

    ALLOC_INFO_MAP::iterator pos;
    ALLOC_INFO* alloc_info;
    std::vector<ALLOC_INFO*> dependencies;

    pthread_mutex_lock(&mm_mutex);
    for ( pos=alloc_info_map.begin() ; pos!=alloc_info_map.end() ; pos++ ) {
        alloc_info = pos->second;
        dependencies.push_back(alloc_info);
    }

    // Sort the dependencies by ALLOC_INFO.id.
    std::sort( dependencies.begin() , dependencies.end() , alloc_info_id_compare) ;
    pthread_mutex_unlock(&mm_mutex);
    write_checkpoint( out_s, dependencies);

}
int f2fs_sync_fs(struct super_block *sb, int sync)
{
	struct f2fs_sb_info *sbi = F2FS_SB(sb);

	trace_f2fs_sync_fs(sb, sync);

	if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES))
		return 0;

	if (sync) {
		mutex_lock(&sbi->gc_mutex);
		write_checkpoint(sbi, false);
		mutex_unlock(&sbi->gc_mutex);
	} else {
		f2fs_balance_fs(sbi);
	}

	return 0;
}
static void f2fs_put_super(struct super_block *sb)
{
    struct f2fs_sb_info *sbi = F2FS_SB(sb);

    if (sbi->s_proc) {
        remove_proc_entry("segment_info", sbi->s_proc);
        remove_proc_entry(sb->s_id, f2fs_proc_root);
    }
    kobject_del(&sbi->s_kobj);

    f2fs_destroy_stats(sbi);
    stop_gc_thread(sbi);

    /* We don't need to do checkpoint when it's clean */
    if (sbi->s_dirty) {
        struct cp_control cpc = {
            .reason = CP_UMOUNT,
        };
        write_checkpoint(sbi, &cpc);
    }

    /*
     * normally superblock is clean, so we need to release this.
     * In addition, EIO will skip do checkpoint, we need this as well.
     */
    release_dirty_inode(sbi);
    release_discard_addrs(sbi);

    iput(sbi->node_inode);
    iput(sbi->meta_inode);

    /* destroy f2fs internal modules */
    destroy_node_manager(sbi);
    destroy_segment_manager(sbi);

    kfree(sbi->ckpt);
    kobject_put(&sbi->s_kobj);
    wait_for_completion(&sbi->s_kobj_unregister);

    sb->s_fs_info = NULL;
    brelse(sbi->raw_super_buf);
    kfree(sbi);
}
Example #15
0
static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
{
	struct inode *inode = file_inode(filp);
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct cp_control cpc;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	if (f2fs_readonly(sbi->sb))
		return -EROFS;

	cpc.reason = __get_cp_reason(sbi);

	mutex_lock(&sbi->gc_mutex);
	write_checkpoint(sbi, &cpc);
	mutex_unlock(&sbi->gc_mutex);

	return 0;
}
Example #16
0
int f2fs_sync_fs(struct super_block *sb, int sync)
{
	struct f2fs_sb_info *sbi = F2FS_SB(sb);
	int err = 0;

	trace_f2fs_sync_fs(sb, sync);

	if (sync) {
		struct cp_control cpc;

		cpc.reason = __get_cp_reason(sbi);

		mutex_lock(&sbi->gc_mutex);
		err = write_checkpoint(sbi, &cpc);
		mutex_unlock(&sbi->gc_mutex);
	}
	f2fs_trace_ios(NULL, 1);

	return err;
}
Example #17
0
static void f2fs_put_super(struct super_block *sb)
{
	struct f2fs_sb_info *sbi = F2FS_SB(sb);

	f2fs_destroy_stats(sbi);
	stop_gc_thread(sbi);

	write_checkpoint(sbi, true);

	iput(sbi->node_inode);
	iput(sbi->meta_inode);

	/* destroy f2fs internal modules */
	destroy_node_manager(sbi);
	destroy_segment_manager(sbi);

	kfree(sbi->ckpt);

	sb->s_fs_info = NULL;
	brelse(sbi->raw_super_buf);
	kfree(sbi);
}
Example #18
0
void MaxPosterior::DE(IM im, popTree* poptree, Chain coldCh, unsigned int nProcs, unsigned int crr_procID)
{
  /*
  if(crr_procID == 0)
    std::cout << "In MaxPosterior::DE()\n";
  */

  unsigned int conv = 0;
  unsigned int iter=0;
  while(conv == 0 && iter < nIters)
    {
      //if(crr_procID==0)
	//std::cout << "Starting DE_eachIter\n";
      DE_eachIter(im, poptree, coldCh, nProcs, crr_procID);
      //if(crr_procID==0)
      //std::cout << "Ending DE_eachIter\n";

      // YC 1/12/2015
      // There are two criteria used to determine the convergence
      // the first criterion is same as that in IMa2
      // and the 2nd criterion is added. It is very difficult
      // to satisfy the 1st criterion if the maximum posterior value is very large, 
      // but the 2nd criterion would determine the convergence earlier.
      maxDist = 0;
      if(logPosteriorMax-logPosteriorMin < pow(10,-4) && abs((logPosteriorMax-logPosteriorMin)/logPosteriorMax) < pow(10,-4)) 
	{
	  /*
	  if(crr_procID ==0)
	    {
	      // std::vector<long double>::const_iterator iter_max;
	      // iter_max = max_element(posterior_atCrr.begin(), posterior_atCrr.end());
	      Eigen::MatrixXd maxPara = para_atCrr.row(ID_max);
	      for(unsigned int ii=0; ii<nParaVectors; ii++)
		{
		  if(ii!=ID_max)
		    {
		      double dd = max((para_atCrr.row(ii)-maxPara).maxCoeff(),(maxPara-para_atCrr.row(ii)).maxCoeff()  );
		      if(maxDist <dd)
			maxDist = dd;
		    }
		}
	    }
	  MPI::COMM_WORLD.Barrier();
	  MPI::COMM_WORLD.Bcast(&maxDist, 1, MPI_DOUBLE, 0);
	  MPI::COMM_WORLD.Barrier();
	  
	  if(maxDist < pow(10,-4))
	    {
	      conv = 1;
	    }
	  */
	  conv =1;
	  // std::cout <<"iter= "<<iter <<" logPosteriorMax-logPosteriorMin="<<logPosteriorMax-logPosteriorMin <<" (logPosteriorMax-logPosteriorMin)/logPosteriorMax="<<(logPosteriorMax-logPosteriorMin)/logPosteriorMax <<"\n";
	}

      if(crr_procID == 0)
	{
	  if(iter - 100* static_cast<unsigned int>(iter/100) == 0)
	    {
	      /*
	      unsigned int found_min = 0;	      
	      unsigned int count_min = 0;
	      while( count_min < nParaVectors &&  found_min==0)
		{
		  if(logPosteriorMin==posterior_atCrr.at(count_min))
		    found_min = 1;
		  else
		    count_min++;
		}
	      */
	      std::cout << "\n\niter = " << iter 
			<<": the largest log(posterior) = "<< logPosteriorMax << ", the smallest log(posterior) = " << logPosteriorMin <<"\n";
	      /*
	      if(maxDist !=0)
		std::cout << "maxDist = " << maxDist <<"\n";
	      */
	      std::cout << "Estimates with the largest posterior: " << para_atCrr.row(ID_max) <<"\n";
	      // std::cout << "Miminum a posterior estimates: " << para_atCrr.row(count_min) <<"\n";   
	      /*
	      std::cout << "The total computing time for getting eigen values and eigen vectors: "
			<<  totalComputingTime_eigen.count()/1000000 <<"(sec)\n";
	      std::cout << "The function was called " << totalNum_eigenFunctionCalls << " times\n";
	      std::cout << "The total computing time for computing the coalescent conditional probabilities: "
			<<  totalComputingTime_condiProb.count()/1000000  <<"(sec)\n";
	      std::cout << "The function was called " << totalNum_condiProbFunctionCalls << " times\n";
	      */
	    }
	  else if(iter - 10* static_cast<unsigned int>(iter/10) == 0)
	    {
	      std::cout <<".";
	    }
	}

      // write a checkpoint
      if(crr_procID ==0)
	{
	  if(checkpoint == 1|| checkpoint==3)
	    {
	      if(iter - howOften_checkpoint * static_cast<unsigned int>(iter/howOften_checkpoint) ==0)
		{
		  write_checkpoint();
		}
	    }
	}


      iter++;

    }
  if(conv==0 && crr_procID==0)
    {
      std::cout << "\nWarning: the optimization did not converge.\n";
    }

 
  if(crr_procID == 0)
    {
      std::cout << "\nMaximum a posterior estimates: " << para_atCrr.row(ID_max)
		<< "\nlog(posterior density) = " << logPosteriorMax
		<<" \n at iteration " << iter
		<<"\n";
      /*
      std::cout << "Computing the posterior means...\n";
      marginals.computeMeanIntervals_forDE();
      std::cout << "Done.\n";
      std::cout << "Saving the posterior means..\n";
      marginals.saveMeanIntervals();
       std::cout << "Done.\n";
      //std::cout << "Saving the histograms..\n";
      marginals.saveHistogram_forDE();
      //std::cout << "Done\n";
      */
     
    }
 
  
  return;
}
Example #19
0
void mdoutf_write_to_trajectory_files(FILE *fplog, t_commrec *cr,
                                      gmx_mdoutf_t of,
                                      int mdof_flags,
                                      gmx_mtop_t *top_global,
                                      gmx_int64_t step, double t,
                                      t_state *state_local, t_state *state_global,
                                      rvec *f_local, rvec *f_global)
{
    rvec *local_v;
    rvec *global_v;

    /* MRS -- defining these variables is to manage the difference
     * between half step and full step velocities, but there must be a better way . . . */

    local_v  = state_local->v;
    global_v = state_global->v;

    if (DOMAINDECOMP(cr))
    {
        if (mdof_flags & MDOF_CPT)
        {
            dd_collect_state(cr->dd, state_local, state_global);
        }
        else
        {
            if (mdof_flags & (MDOF_X | MDOF_X_COMPRESSED))
            {
                dd_collect_vec(cr->dd, state_local, state_local->x,
                               state_global->x);
            }
            if (mdof_flags & MDOF_V)
            {
                dd_collect_vec(cr->dd, state_local, local_v,
                               global_v);
            }
        }
        if (mdof_flags & MDOF_F)
        {
            dd_collect_vec(cr->dd, state_local, f_local, f_global);
        }
    }
    else
    {
        if (mdof_flags & MDOF_CPT)
        {
            /* All pointers in state_local are equal to state_global,
             * but we need to copy the non-pointer entries.
             */
            state_global->lambda = state_local->lambda;
            state_global->veta   = state_local->veta;
            state_global->vol0   = state_local->vol0;
            copy_mat(state_local->box, state_global->box);
            copy_mat(state_local->boxv, state_global->boxv);
            copy_mat(state_local->svir_prev, state_global->svir_prev);
            copy_mat(state_local->fvir_prev, state_global->fvir_prev);
            copy_mat(state_local->pres_prev, state_global->pres_prev);
        }
    }

    if (MASTER(cr))
    {
        if (mdof_flags & MDOF_CPT)
        {
            fflush_tng(of->tng);
            fflush_tng(of->tng_low_prec);
            write_checkpoint(of->fn_cpt, of->bKeepAndNumCPT,
                             fplog, cr, of->eIntegrator, of->simulation_part,
                             of->bExpanded, of->elamstats, step, t, state_global);
        }

        if (mdof_flags & (MDOF_X | MDOF_V | MDOF_F))
        {
            if (of->fp_trn)
            {
                gmx_trr_write_frame(of->fp_trn, step, t, state_local->lambda[efptFEP],
                                    state_local->box, top_global->natoms,
                                    (mdof_flags & MDOF_X) ? state_global->x : NULL,
                                    (mdof_flags & MDOF_V) ? global_v : NULL,
                                    (mdof_flags & MDOF_F) ? f_global : NULL);
                if (gmx_fio_flush(of->fp_trn) != 0)
                {
                    gmx_file("Cannot write trajectory; maybe you are out of disk space?");
                }
            }

            gmx_fwrite_tng(of->tng, FALSE, step, t, state_local->lambda[efptFEP],
                           state_local->box,
                           top_global->natoms,
                           (mdof_flags & MDOF_X) ? state_global->x : NULL,
                           (mdof_flags & MDOF_V) ? global_v : NULL,
                           (mdof_flags & MDOF_F) ? f_global : NULL);
        }
        if (mdof_flags & MDOF_X_COMPRESSED)
        {
            rvec *xxtc = NULL;

            if (of->natoms_x_compressed == of->natoms_global)
            {
                /* We are writing the positions of all of the atoms to
                   the compressed output */
                xxtc = state_global->x;
            }
            else
            {
                /* We are writing the positions of only a subset of
                   the atoms to the compressed output, so we have to
                   make a copy of the subset of coordinates. */
                int i, j;

                snew(xxtc, of->natoms_x_compressed);
                for (i = 0, j = 0; (i < of->natoms_global); i++)
                {
                    if (ggrpnr(of->groups, egcCompressedX, i) == 0)
                    {
                        copy_rvec(state_global->x[i], xxtc[j++]);
                    }
                }
            }
            if (write_xtc(of->fp_xtc, of->natoms_x_compressed, step, t,
                          state_local->box, xxtc, of->x_compression_precision) == 0)
            {
                gmx_fatal(FARGS, "XTC error - maybe you are out of disk space?");
            }
            gmx_fwrite_tng(of->tng_low_prec,
                           TRUE,
                           step,
                           t,
                           state_local->lambda[efptFEP],
                           state_local->box,
                           of->natoms_x_compressed,
                           xxtc,
                           NULL,
                           NULL);
            if (of->natoms_x_compressed != of->natoms_global)
            {
                sfree(xxtc);
            }
        }
    }
}
Example #20
0
/* This function is called (via check_events()) from the top level sieve
   loops (prime_sieve() etc.). It can assume that it is safe to tighten any
   sieving parameters other than p_min and p_max.
*/
void process_events(uint64_t current_prime)
{
  /* event_happened was set last in notify_event(), so clear it first which
     ensures that if some signal arrives while we are in process_events()
     it might have to wait until the next sieve iteration to get processed,
     but it won't be lost.
  */
  event_happened = 0;

  if (clear_event(initialise_events))
  {
    init_signals();
    init_progress_report(current_prime);
  }

  if (clear_event(sieve_parameters_changed))
    init_progress_report(current_prime);

  if (clear_event(received_sigterm))
  {
    finish_srsieve("SIGTERM was received",current_prime);
    signal(SIGTERM,SIG_DFL);
    raise(SIGTERM);
  }

  if (clear_event(received_sigint))
  {
    finish_srsieve("SIGINT was received",current_prime);
    signal(SIGINT,SIG_DFL);
    raise(SIGINT);
  }

#ifdef SIGHUP
  if (clear_event(received_sighup))
  {
    finish_srsieve("SIGHUP was received",current_prime);
    signal(SIGHUP,SIG_DFL);
    raise(SIGHUP);
  }
#endif

#if HAVE_FORK
  if (clear_event(received_sigpipe))
  {
    finish_srsieve("SIGPIPE was received",current_prime);
    signal(SIGPIPE,SIG_DFL);
    raise(SIGPIPE);
  }

  if (clear_event(received_sigchld))
  {
    finish_srsieve("SIGCHLD was received",current_prime);
    signal(SIGCHLD,SIG_DFL);
    raise(SIGCHLD);
    exit(EXIT_FAILURE);
  }
#endif

  if (clear_event(factor_found))
    next_report_due = time(NULL);

  if (clear_event(report_due))
    progress_report(current_prime);

  if (clear_event(save_due))
  {
#if SOBISTRATOR_OPT
    if (sobistrator_opt)
      sob_write_checkpoint(current_prime);
#endif
    write_checkpoint(current_prime);
  }
}
Example #21
0
void checkdata(char* file, size_t size, int times)
{
  char* buf = malloc(size);

  MPI_Barrier(MPI_COMM_WORLD);

  if (times > 0) {
    /* write the checkpoint file */
    int i, j;
    for(i=0; i < times; i++) {
      int rc;
      int valid = 0;

      rc = init_buffer(buf, size, rank, i);

      if (rank == 0) {
        printf("Writing checkpoint %d.\n", i);  fflush(stdout);
      }

      /* open the file and write the checkpoint */
      int fd_me = open(file, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
      if (fd_me > 0) {
        valid = 1;

        /* write the checkpoint data */
        rc = write_checkpoint(fd_me, rank, i, buf, size);
        if (rc < 0) {
          valid = 0;
        }

        /* force the data to storage */
        rc = fsync(fd_me);
        if (rc < 0) {
          valid = 0;
        }

        /* make sure the close is without error */
        rc = close(fd_me);
        if (rc < 0) {
          valid = 0;
        }
      }

      if (rank == 0) {
        printf("Completed checkpoint %d.\n", i);  fflush(stdout);
      }

      if (rank == 0) {
        printf("Reading checkpoint %d.\n", i);  fflush(stdout);
      }

      memset(buf, 0, size);

      /* open the file and write the checkpoint */
      int read_rank, read_timestep;
      fd_me = open(file, O_RDONLY);
      if (fd_me > 0) {
        valid = 1;

        /* write the checkpoint data */
        rc = read_checkpoint(fd_me, &read_rank, &read_timestep, buf, size);
        if (rc < 0) {
          valid = 0;
        }

        /* make sure the close is without error */
        rc = close(fd_me);
        if (rc < 0) {
          valid = 0;
        }

      }

      if (read_rank != rank || read_timestep != i) {
        printf("INVALID HEADER on rank %d in step %d\n", rank, i);  fflush(stdout);
        MPI_Abort(MPI_COMM_WORLD, 0);
      }

      rc = check_buffer(buf, size, rank, i);
      if (! rc) {
        printf("INVALID DATA on rank %d in step %d\n", rank, i);  fflush(stdout);
        MPI_Abort(MPI_COMM_WORLD, 0);
      }

      if (rank == 0) {
        printf("Verified checkpoint %d.\n", read_timestep);  fflush(stdout);
      }

      /* optionally sleep for some time */
      if (seconds > 0) {
        if (rank == 0) {
          printf("Sleeping for %d seconds... \n", seconds); fflush(stdout);
        }
        sleep(seconds);
      }

      unlink(file);
    }
  }

  MPI_Barrier(MPI_COMM_WORLD);

  if (buf != NULL) {
    free(buf);
    buf = NULL;
  }

  return;
}
Example #22
0
int main (int argc, char* argv[])
{
  char *path_to_stdout = NULL;
  int scr_retval;
  /* check that we got an appropriate number of arguments */
  if (argc == 2) {
    path_to_stdout = argv[1];
  }
  else if(argc == 5){
    filesize = (size_t) atol(argv[1]);
    times = atoi(argv[2]);
    seconds = atoi(argv[3]);
    path_to_stdout = argv[4];
  }
  else{
    printf("Usage: test_api_file [filesize times sleep_secs path_to_stdout]\n");
    printf("OR: test_api_file [ path_to_stdout]\n");
    exit(1);
  }
  
  MPI_Init(&argc, &argv);

  int rank = -1, size = 0;
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &size);

  /* open file for stdout */
  printf("new stdout filename: \"%s\"\n", path_to_stdout);
  fflush(stdout);
  freopen(path_to_stdout, "a+", stdout);
  MPI_Barrier(MPI_COMM_WORLD);

  /* time how long it takes to get through init */
  MPI_Barrier(MPI_COMM_WORLD);

  double init_start = MPI_Wtime();
  if (SCR_Init() != SCR_SUCCESS){
    printf("FAILED INITIALIZING SCR\n");
    fclose(stdout);
    return -1;
  }
  double init_end = MPI_Wtime();
  double secs = init_end - init_start;

  MPI_Barrier(MPI_COMM_WORLD);

  double secsmin, secsmax, secssum;
  MPI_Reduce(&secs, &secsmin, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
  MPI_Reduce(&secs, &secsmax, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
  MPI_Reduce(&secs, &secssum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
  if (rank == 0) { printf("Init: Min %8.6f s\tMax %8.6f s\tAvg %8.6f s\n", secsmin, secsmax, secssum/size); }

  MPI_Barrier(MPI_COMM_WORLD);

  int num_files = rank % 4;
  char** files = NULL;
  char** bufs  = NULL;
  size_t* filesizes = NULL;
  char* buf = NULL;
  if (num_files > 0) {
    files = (char**) malloc(num_files * sizeof(char*));
    bufs  = (char**) malloc(num_files * sizeof(char*));
    filesizes = (size_t*) malloc(num_files * sizeof(size_t));
  }

  int i;
  for (i=0; i < num_files; i++) {
    // route our checkpoint file
    char name[256];
    sprintf(name, "rank_%d.%d.ckpt", rank, i);
    files[i] = strdup(name);
    filesizes[i] = filesize + rank + 2*i;
    bufs[i] = (char*) malloc(filesizes[i]);
  }
  if (num_files > 0) {
    buf = (char*) malloc(filesizes[num_files-1]);
  }

  // check each of our checkpoint files
  int found_checkpoint = 1;
  for (i=0; i < num_files; i++) {
    char file[2094];
    scr_retval = SCR_Route_file(files[i], file);
    if (scr_retval != SCR_SUCCESS) {
      printf("%d: failed calling SCR_Route_file(): %d: @%s:%d\n",
             rank, scr_retval, __FILE__, __LINE__
      );
    }
    if (read_checkpoint(file, &timestep, buf, filesizes[i])) {
      // check that contents are good
      if (!check_buffer(buf, filesizes[i], rank + 2*i, timestep)) {
        printf("!!!!CORRUPTION!!!! Rank %d, File %s: Invalid value in buffer\n", rank, file);
        fflush(stdout);
        fclose(stdout);
        MPI_Abort(MPI_COMM_WORLD, 1);
        return 1;
      }
    } else {
      found_checkpoint = 0;
    }
  }

  // check that everyone found their checkpoint files ok
  int all_found_checkpoint = 0;
  MPI_Allreduce(&found_checkpoint, &all_found_checkpoint, 1, MPI_INT, MPI_LAND, MPI_COMM_WORLD);
  if (!all_found_checkpoint && rank == 0) {
    printf("At least one rank (perhaps all) did not find its checkpoint\n");
    fflush(stdout);
  }

  // check that everyone is at the same timestep
  int timestep_and, timestep_or;
  int timestep_a, timestep_o;
  if (num_files > 0) {
    timestep_a = timestep;
    timestep_o = timestep;
  } else {
    timestep_a = 0xffffffff;
    timestep_o = 0x00000000;
  }
  MPI_Allreduce(&timestep_a, &timestep_and, 1, MPI_INT, MPI_BAND, MPI_COMM_WORLD);
  MPI_Allreduce(&timestep_o, &timestep_or,  1, MPI_INT, MPI_BOR,  MPI_COMM_WORLD);
  if (timestep_and != timestep_or) {
    printf("%d: Timesteps don't agree: timestep %d\n", rank, timestep);
    fflush(stdout);
    fclose(stdout);
    return 1;
  }
  timestep = timestep_and;

  // make up some data for the next checkpoint
  for (i=0; i < num_files; i++) {
    init_buffer(bufs[i], filesizes[i], rank + 2*i, timestep);
  }

  timestep++;

  // prime system once before timing
  int t;
  for(t=0; t < 1; t++) {
    int rc;
    int all_valid = 1;
    scr_retval = SCR_Start_checkpoint();
    if (scr_retval != SCR_SUCCESS) {
      printf("%d: failed calling SCR_Start_checkpoint(): %d: @%s:%d\n",
             rank, scr_retval, __FILE__, __LINE__
      );
    }
  for (i=0; i < num_files; i++) {
    int valid = 0;
    char file[2094];
    scr_retval = SCR_Route_file(files[i], file);
    if (scr_retval != SCR_SUCCESS) {
      printf("%d: failed calling SCR_route_file(): %d: @%s:%d\n",
             rank, scr_retval, __FILE__, __LINE__
      );
    }
    int fd_me = open(file, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
    if (fd_me > 0) {
      valid = 1;

      // write the checkpoint
      rc = write_checkpoint(fd_me, timestep, bufs[i], filesizes[i]);
      if (rc < 0) { valid = 0; }

      rc = fsync(fd_me);
      if (rc < 0) { valid = 0; }

      // make sure the close is without error
      rc = close(fd_me);
      if (rc < 0) { valid = 0; }
    }
    if (!valid) { all_valid = 0; }
  }
  scr_retval = SCR_Complete_checkpoint(all_valid);
  if (scr_retval != SCR_SUCCESS) {
    printf("%d: failed calling SCR_Complete_checkpoint(): %d: @%s:%d\n",
           rank, scr_retval, __FILE__, __LINE__
    );
  }
  if (rank == 0) { printf("Completed checkpoint %d.\n", timestep); fflush(stdout); }

  timestep++;
  }
  MPI_Barrier(MPI_COMM_WORLD);

  if (times > 0) {
    int count = 0;
    double time_start = MPI_Wtime();
    for(t=0; t < times; t++) {
      int rc;
      int all_valid = 1;
      scr_retval = SCR_Start_checkpoint();
      if (scr_retval != SCR_SUCCESS) {
        printf("%d: failed calling SCR_Start_checkpoint(): %d: @%s:%d\n",
               rank, scr_retval, __FILE__, __LINE__
        );
      }
      for (i=0; i < num_files; i++) {
        int valid = 0;
        char file[2094];
        scr_retval = SCR_Route_file(files[i], file);
        if (scr_retval != SCR_SUCCESS) {
          printf("%d: failed calling SCR_Route_file(): %d: @%s:%d\n",
                 rank, scr_retval, __FILE__, __LINE__
          );
        }
        int fd_me = open(file, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
        if (fd_me > 0) {
          count++;
          valid = 1;
          
          // write the checkpoint
          rc = write_checkpoint(fd_me, timestep, bufs[i], filesizes[i]);
          if (rc < 0) { valid = 0; }
          
          rc = fsync(fd_me);
          if (rc < 0) { valid = 0; }
          
          // make sure the close is without error
          rc = close(fd_me);
          if (rc < 0) { valid = 0; }
        }
        if (!valid) { all_valid = 0; }
      }
      scr_retval = SCR_Complete_checkpoint(all_valid);
      if (scr_retval != SCR_SUCCESS) {
        printf("%d: failed calling SCR_Complete_checkpoint(): %d: @%s:%d\n",
               rank, scr_retval, __FILE__, __LINE__
        );
      }
      if (rank == 0) { printf("Completed checkpoint %d.\n", timestep); fflush(stdout); }
      
      timestep++;
      if (seconds > 0) {
        if (rank == 0) { printf("Sleeping for %d seconds... \n", seconds); fflush(stdout); }
        sleep(seconds);
      }
    }
    double time_end = MPI_Wtime();
    double bw = (filesize*count/(1024*1024)) / (time_end - time_start);
    
    MPI_Barrier(MPI_COMM_WORLD);
    
    double bwmin, bwmax, bwsum;
    MPI_Reduce(&bw, &bwmin, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
    MPI_Reduce(&bw, &bwmax, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
    MPI_Reduce(&bw, &bwsum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    if (rank == 0) { printf("FileIO: Min %7.2f MB/s\tMax %7.2f MB/s\tAvg %7.2f MB/s\n", bwmin, bwmax, bwsum/size); }
  }

  if (buf != NULL) { free(buf); buf = NULL; }
  for (i=0; i < num_files; i++) {
    if (bufs[i]  != NULL) { free(bufs[i]);  bufs[i]  = NULL; }
    if (files[i] != NULL) { free(files[i]); files[i] = NULL; }
  }
  if (files     != NULL) { free(files);     files     = NULL; }
  if (bufs      != NULL) { free(bufs);      bufs      = NULL; }
  if (filesizes != NULL) { free(filesizes); filesizes = NULL; }

  scr_retval = SCR_Finalize();
  if (scr_retval != SCR_SUCCESS) {
    printf("%d: failed calling SCR_Finalize(): %d: @%s:%d\n",
           rank, scr_retval, __FILE__, __LINE__
    );
  }
  MPI_Finalize();

  fclose(stdout);
  return 0;
}
Example #23
0
void run_gp(multipop *mpop, int startgen, event *t_eval, event *t_breed,
		int startfromcheckpoint) {
	char *param;
	int gen;
	int maxgen;
	int exch_gen;
	int i, j;
	int checkinterval;
	char *checkfileformat;
	char *checkfilename = NULL;
	event start, end, diff;
	int term = 0;
	termination_override =0;
	int stt_interval;
	int bestn;

	if (!startfromcheckpoint) {

		/* get the number of top individuals to track. */
		bestn = atoi(get_parameter("output.bestn"));
		if (bestn < 1) {
			error( E_WARNING,
					"\"output.bestn\" must be at least 1.  defaulting to 1.");
			bestn = 1;
		}

		/* allocate statistics for overall run. */
		run_stats = (popstats *) MALLOC((mpop->size + 1) * sizeof(popstats));
		for (i = 0; i < mpop->size + 1; ++i) {
			run_stats[i].bestn = bestn;
			run_stats[i].size = -1;
		}

		/* initialize the linked list of saved individuals. */
		saved_head = (saved_ind *) MALLOC(sizeof(saved_ind));
		saved_head->ind = NULL;
		saved_head->refcount = 0;
		saved_head->next = NULL;
		saved_tail = saved_head;
	}

	/* get the maximum number of generations. */
	param = get_parameter("max_generations");
	if (param == NULL)
		error( E_FATAL_ERROR, "no value specified for \"max_generations\".");
	maxgen = atoi(param);
	if (maxgen <= 0)
		error( E_FATAL_ERROR, "\"max_generations\" must be greater than zero.");

	/* get the interval for subpopulation exchanges, if there is more than
	 one subpopulation. */
	if (mpop->size > 1) {
		param = get_parameter("multiple.exch_gen");
		if (param == NULL)
			error( E_FATAL_ERROR,
					"no value specified for \"multiple.exch_gen\".");
		exch_gen = atoi(param);
		if (exch_gen <= 0)
			error( E_FATAL_ERROR,
					"\"multiple.exch_gen\" must be greater than zero.");
	}

	/* get the interval for doing checkpointing. */
	param = get_parameter("checkpoint.interval");
	if (param == NULL)
		/* checkpointing disabled. */
		checkinterval = -1;
	else
		checkinterval = atoi(param);

	/* get the format string for the checkpoint filenames. */
	checkfileformat = get_parameter("checkpoint.filename");
	checkfilename = (char *) MALLOC(strlen(checkfileformat) + 50);

	/* get the interval for writing information to the .stt file. */
	stt_interval = atoi(get_parameter("output.stt_interval"));
	if (stt_interval < 1)
		error( E_FATAL_ERROR,
				"\"output.stt_interval\" must be greater than zero.");

	oputs( OUT_SYS, 10, "\n\nstarting evolution.\n");

	/* print out how often we'll be doing checkpointing. */
	if (checkinterval > 0)
		oprintf( OUT_SYS, 20,
				"checkpointing will be done every %d generations and "
						"after the last generation.\n", checkinterval);
	else if (checkinterval == 0)
		oprintf( OUT_SYS, 20, "checkpointing will be done only after the last "
				"generation.\n");
	else
		oprintf( OUT_SYS, 20, "no checkpointing will be done.\n");

	/* the big loop. */
	for (gen = startgen; gen < maxgen && !term; ++gen) {
		oprintf( OUT_SYS, 20, "=== generation %d.\n", gen);
		generation_No = gen;
		/* unless this is the first generation after loading a checkpoint
		 file... */
		if (!(startfromcheckpoint && gen == startgen)) {

			/* evaluate the population. */
			event_mark(&start);
			for (i = 0; i < mpop->size; ++i) { //generation_No = i;
				evaluate_pop(mpop->pop[i]);
			}
			event_mark(&end);
			event_diff(&diff, &start, &end);

#ifdef TIMING_AVAILABLE
			oprintf( OUT_SYS, 40, "    evaluation complete.  (%s)\n",
					event_string(&diff));
#else
			oprintf ( OUT_SYS, 40, "    evaluation complete.\n" );
#endif

			event_accum(t_eval, &diff);

			/* calculate and print statistics.  returns 1 if user termination
			 criterion was met, 0 otherwise. */
			term = generation_information(gen, mpop, stt_interval,
					run_stats[0].bestn);
			if (term) {
				//oprintf( OUT_SYS, 30, "user termination criterion met.\n");
				/*extern float *optimal_in_generation;
				extern int *optimal_index_in_generation;
				extern int same_optimal_count;
				int i;
				for (i = 0; i < generationSIZE; i++) {
					if ((int) optimal_in_generation[i] == -1) {
						printf("tried to Break");
						break;
					}
					printf("Index: %d ERR : %f -Index %d Same : %i\n", i,
							optimal_in_generation[i],
							optimal_index_in_generation[i], same_optimal_count);
				}*/
			}
			flush_output_streams();

		}

		/** write a checkpoint file if checkinterval is non-negative and:
		 we've reached the last generation, or
		 the user termination criterion has been met, or
		 we've reached the specified checkpoint interval. **/
		if (checkinterval >= 0
				&& (gen == maxgen || term
						|| (checkinterval > 0 && gen > startgen
								&& (gen % checkinterval) == 0))) {
			sprintf(checkfilename, checkfileformat, gen);
			write_checkpoint(gen, mpop, checkfilename);
		}

		/** if this is not the last generation and the user criterion hasn't
		 been met, then do breeding. **/
		if (gen != maxgen && !term) {

			/** exchange subpops if it's time. **/
			if (mpop->size > 1 && gen && (gen % exch_gen) == 0) {
				exchange_subpopulations(mpop);
				oprintf( OUT_SYS, 10, "    subpopulation exchange complete.\n");
			}

			/* breed the new population. */
			event_mark(&start);
			for (i = 0; i < mpop->size; ++i)
				mpop->pop[i] = change_population(mpop->pop[i], mpop->bpt[i]);
			event_mark(&end);
			event_diff(&diff, &start, &end);

			/* call the application end-of-breeding callback. */
			app_end_of_breeding(gen, mpop);

#ifdef TIMING_AVAILABLE
			oprintf( OUT_SYS, 30, "    breeding complete.    (%s)\n",
					event_string(&diff));
#else
			oprintf ( OUT_SYS, 30, "    breeding complete.\n" );
#endif

			event_accum(t_breed, &diff);

		}

		/* free unused ERCs. */
		ephem_const_gc();

		flush_output_streams();

	}

	/** free up a lot of stuff before returning. */

	if (checkfilename)
		FREE(checkfilename);

	ephem_const_gc();

	for (i = 0; i < mpop->size + 1; ++i) {
		for (j = 0; j < run_stats[i].bestn; ++j)
			--run_stats[i].best[j]->refcount;
		FREE(run_stats[i].best);
	}
	FREE(run_stats);

	saved_individual_gc();
	FREE(saved_head);
}
Example #24
0
double getbw(char* name, char* buf, size_t size, int times)
{
  char file[SCR_MAX_FILENAME];
  double bw = 0.0;

  if (times > 0) {
    /* start the timer */
    double time_start = MPI_Wtime();

    /* write the checkpoint file */
    int i, count = 0;
    for(i=0; i < times; i++) {
      int rc;
      int valid = 0;

/*
      int need_checkpoint;
      SCR_Need_checkpoint(&need_checkpoint);
      if (need_checkpoint) {
*/

      /* instruct SCR we are starting the next checkpoint */
      SCR_Start_checkpoint();

      /* get the file name to write our checkpoint file to */
      char newname[SCR_MAX_FILENAME];
      sprintf(newname, "timestep.%d/%s", timestep, name);
      SCR_Route_file(newname, file);

      /* open the file and write the checkpoint */
      int fd_me = open(file, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
      if (fd_me > 0) {
        count++;
        valid = 1;

        /* write the checkpoint data */
        rc = write_checkpoint(fd_me, timestep, buf, size);
        if (rc < 0) {
          valid = 0;
          printf("%d: Error writing to %s\n", rank, file);
        }

        /* force the data to storage */
        rc = fsync(fd_me);
        if (rc < 0) {
          valid = 0;
          printf("%d: Error fsync %s\n", rank, file);
        }

        /* make sure the close is without error */
        rc = close(fd_me);
        if (rc < 0) {
          valid = 0;
          printf("%d: Error closing %s\n", rank, file);
        }
      }
      else {
      	printf("%d: Could not open file %s\n", rank, file);
      }
      /*
      if( valid )
      	printf("%d: Wrote checkpoint to %s\n", rank, file);
      */

      /* mark this checkpoint as complete */
      SCR_Complete_checkpoint(valid);
      if (rank == 0) {
        printf("Completed checkpoint %d.\n", timestep);
        fflush(stdout);
      }

/*
      }
*/

      /* increase the timestep counter */
      timestep++;

      /* optionally sleep for some time */
      if (seconds > 0) {
        if (rank == 0) { printf("Sleeping for %d seconds... \n", seconds); fflush(stdout); }
        sleep(seconds);
      }
    }

    /* stop the timer and compute the bandwidth */
    double time_end = MPI_Wtime();
    bw = ((size * count) / (1024*1024)) / (time_end - time_start);
  }

  return bw;
}