Example #1
0
int
main(int argc, const char **argv) {

  try {
    
    string prb_file;
    string seqfile;
    string outfile;
    size_t n_reads = 1000;
    size_t read_width = 25;
    size_t max_errors = 0;
    size_t random_number_seed = numeric_limits<size_t>::max();
    
    bool VERBOSE = false;
    bool FASTQ_OUTPUT = false;

    /****************** COMMAND LINE OPTIONS ********************/
    static OptionParser opt_parse("simreads",
				  "program for generating simulated reads"
				  " with simulated quality scores",
				  "<fasta-chrom-files>");
    opt_parse.add_opt("output", 'o', "Name of output file (default: stdout)",
		      false , outfile);
    opt_parse.add_opt("reads", 'n', "number of reads to simulate", 
		      false, n_reads);
    opt_parse.add_opt("width", 'w', "width of reads to simulate", 
		      false, read_width);
    opt_parse.add_opt("err", 'e', "maximum number of simulated sequencing errors",
		      false, max_errors);
    opt_parse.add_opt("verbose", 'v', "print more run info", 
		      false, VERBOSE);
    opt_parse.add_opt("fastq", 'q', "write FASTQ format reads", 
		      false, FASTQ_OUTPUT);
    opt_parse.add_opt("prob", 'p', "prb output file", 
		      false, prb_file);
    opt_parse.add_opt("seed", 'S', "random number seed", 
		      false, random_number_seed);
    vector<string> leftover_args;
    opt_parse.parse(argc, argv, leftover_args);
    if (argc == 1 || opt_parse.help_requested()) {
      cerr << opt_parse.help_message() << endl;
      return EXIT_SUCCESS;
    }
    if (opt_parse.about_requested()) {
      cerr << opt_parse.about_message() << endl;
      return EXIT_SUCCESS;
    }
    if (opt_parse.option_missing()) {
      cerr << opt_parse.option_missing_message() << endl;
      return EXIT_SUCCESS;
    }
    if (leftover_args.empty()) {
      cerr << opt_parse.help_message() << endl;
      return EXIT_SUCCESS;
    }
    vector<string> filenames(leftover_args);
    /****************** END COMMAND LINE OPTIONS *****************/
    
    if (FASTQ_OUTPUT && !prb_file.empty())
      throw SMITHLABException("fastq output is incompatible "
			  "with specifying a prb file");

    const Runif rng(random_number_seed);
    
    vector<string> reads, read_names;
    vector<vector<vector<double> > > probs;
    
    vector<size_t> filesizes;
    double total = 0;
    for (size_t i = 0; i < filenames.size(); ++i) {
      filesizes.push_back(get_filesize(filenames[i]));
      total += filesizes.back();
    }

    if (VERBOSE)
      cerr << "[OBTAINING READ SAMPLE DISTRIBUTION]";
    vector<size_t> samples;
    for (size_t i = 0; i < filesizes.size(); ++i)
      samples.push_back(n_reads*filesizes[i]/total);
    if (VERBOSE) cerr << "[DONE]" << endl;
    
    if (!outfile.empty())
      ofstream out(outfile.c_str());
    
    if (!prb_file.empty())
      ofstream prb(prb_file.c_str());
    
    for (size_t i = 0; i < filenames.size(); ++i) {
      if (isdir(filenames[i].c_str()))
	throw SMITHLABException("\"" + filenames[i] + 
			    "\" not a FASTA format sequence file?");
      if (VERBOSE)
	cerr << "[LOADING=" << filenames[i] << "]";
      vector<string> names, sequences;
      read_fasta_file(filenames[i].c_str(), names, sequences);
      if (VERBOSE) cerr << "[DONE]" << endl;

      double sub_total = 0;
      for (size_t k = 0; k < sequences.size(); ++k)
	sub_total += sequences[i].length();
      
      vector<size_t> sub_samples;
      for (size_t k = 0; k < sequences.size(); ++k)
	sub_samples.push_back(samples[i]*sequences[i].length()/sub_total);
      
      for (size_t j = 0; j < names.size(); ++j) {
	if (VERBOSE)
	  cerr << "[PROCESSING CHROM=" << names[j] << "]" << endl;
	const size_t offset = names[j].find(':');
	const string name(names[j].substr(0, offset));
	
	transform(sequences[j].begin(), sequences[j].end(), 
		  sequences[j].begin(), std::ptr_fun(&toupper));
	
	simreads(FASTQ_OUTPUT, outfile, prb_file,
		 rng, sub_samples[i], read_width, max_errors, 
		 name, sequences[j], read_names, reads, probs);
      }
    }
  }      
  catch (std::bad_alloc &ba) {
    cerr << "ERROR: could not allocate memory" << endl;
    return EXIT_FAILURE;
  }
  catch (SMITHLABException &e) {
    cerr << e.what() << endl;
    return EXIT_FAILURE;
  }
  catch (std::exception &e) {
    cerr << "ERROR: " << e.what() << endl;
    return EXIT_FAILURE;
  }
  return EXIT_SUCCESS;
}
Example #2
0
int
main(int argc, const char **argv) {

  try {
    
    string prb_file;
    string seqfile;
    string outfile;
    size_t n_reads = 1000;
    size_t read_width = 25;
    size_t max_errors = 0;
    double meth_rate = 0.0;
    double bs_rate = 1.0;
    size_t random_number_seed = numeric_limits<size_t>::max();

    bool VERBOSE = false;
    bool FASTQ_OUTPUT = false;
    bool AG_WILDCARD = false;
    
    /****************** COMMAND LINE OPTIONS ********************/
    static OptionParser 
      opt_parse("simreadsbs",
		"program for generating simulated bisulfite treated reads with "
		"simulated quality scores",
		"<fasta-chrom-files>");
    opt_parse.add_opt("output", 'o', "Name of output file (default: stdout)", false , outfile);
    opt_parse.add_opt("reads", 'n', "number of reads to simulate", false , n_reads);
    opt_parse.add_opt("width", 'w', "width of reads to simulate", false, read_width);
    opt_parse.add_opt("err", 'e', "maximum number of simulated sequencing errors", 
		      false, max_errors);
    opt_parse.add_opt("verbose", 'v', "print more run info", 
		      false, VERBOSE);
    opt_parse.add_opt("fastq", 'q', "write FASTQ format reads", 
		      false, FASTQ_OUTPUT);
    opt_parse.add_opt("prob", 'p', "prb output file", false, prb_file);
    opt_parse.add_opt("meth", 'm', "rate of CpG methylation", 
		      false, meth_rate); 
    opt_parse.add_opt("bs", 'b', "rate of bisulfite conversion", 
		      false, bs_rate);
    opt_parse.add_opt("ag", 'A', "generate A/G wildcard reads", 
		      false, AG_WILDCARD);
    opt_parse.add_opt("seed", 'S', "random number seed", 
		      false, random_number_seed);
    vector<string> leftover_args;
    opt_parse.parse(argc, argv, leftover_args);
    if (argc == 1 || opt_parse.help_requested()) {
      cerr << opt_parse.help_message() << endl;
      return EXIT_SUCCESS;
    }
    if (opt_parse.about_requested()) {
      cerr << opt_parse.about_message() << endl;
      return EXIT_SUCCESS;
    }
    if (opt_parse.option_missing()) {
      cerr << opt_parse.option_missing_message() << endl;
      return EXIT_SUCCESS;
    }
    if (leftover_args.empty()) {
      cerr << opt_parse.help_message() << endl;
      return EXIT_SUCCESS;
    }
    vector<string> filenames(leftover_args);
    /****************** END COMMAND LINE OPTIONS *****************/
    
    if (FASTQ_OUTPUT && !prb_file.empty())
      throw SMITHLABException("fastq output is incompatible "
			  "with specifying a prb file");
    
    const Runif rng(random_number_seed);
    
    vector<string> reads, read_names;
    vector<vector<vector<double> > > probs;
    
    vector<size_t> filesizes;
    double total = 0;
    for (size_t i = 0; i < filenames.size(); ++i) {
      filesizes.push_back(get_filesize(filenames[i]));
      total += filesizes.back();
    }
    
    vector<size_t> samples;
    for (size_t i = 0; i < filesizes.size(); ++i)
      samples.push_back(n_reads*filesizes[i]/total);

    if (!outfile.empty())
      ofstream out(outfile.c_str());

    if (!prb_file.empty())
      ofstream prb(prb_file.c_str());
    
    for (size_t i = 0; i < filenames.size(); ++i) {
      if (VERBOSE)
	cerr << filenames[i] << endl;
      
      vector<string> names, sequences;
      read_fasta_file(filenames[i].c_str(), names, sequences);
      
      for (size_t j = 0; j < names.size(); ++j) {
	const size_t offset = names[j].find(':');
	const string name(names[j].substr(0, offset));
	simreads_bs(FASTQ_OUTPUT, AG_WILDCARD,
		    outfile, prb_file,
		    rng, samples[i], read_width, max_errors, 
		    bs_rate, meth_rate, name, sequences[j], 
		    read_names, reads, probs);
      }
    }
    
  }      
  catch (std::bad_alloc &ba) {
    cerr << "ERROR: could not allocate memory" << endl;
    return EXIT_FAILURE;
  }
  catch (SMITHLABException &e) {
    cerr << e.what() << endl;
    return EXIT_FAILURE;
  }
  catch (std::exception &e) {
    cerr << "ERROR: " << e.what() << endl;
    return EXIT_FAILURE;
  }
  return EXIT_SUCCESS;
}