int main(int argc, const char **argv) { try { string prb_file; string seqfile; string outfile; size_t n_reads = 1000; size_t read_width = 25; size_t max_errors = 0; size_t random_number_seed = numeric_limits<size_t>::max(); bool VERBOSE = false; bool FASTQ_OUTPUT = false; /****************** COMMAND LINE OPTIONS ********************/ static OptionParser opt_parse("simreads", "program for generating simulated reads" " with simulated quality scores", "<fasta-chrom-files>"); opt_parse.add_opt("output", 'o', "Name of output file (default: stdout)", false , outfile); opt_parse.add_opt("reads", 'n', "number of reads to simulate", false, n_reads); opt_parse.add_opt("width", 'w', "width of reads to simulate", false, read_width); opt_parse.add_opt("err", 'e', "maximum number of simulated sequencing errors", false, max_errors); opt_parse.add_opt("verbose", 'v', "print more run info", false, VERBOSE); opt_parse.add_opt("fastq", 'q', "write FASTQ format reads", false, FASTQ_OUTPUT); opt_parse.add_opt("prob", 'p', "prb output file", false, prb_file); opt_parse.add_opt("seed", 'S', "random number seed", false, random_number_seed); vector<string> leftover_args; opt_parse.parse(argc, argv, leftover_args); if (argc == 1 || opt_parse.help_requested()) { cerr << opt_parse.help_message() << endl; return EXIT_SUCCESS; } if (opt_parse.about_requested()) { cerr << opt_parse.about_message() << endl; return EXIT_SUCCESS; } if (opt_parse.option_missing()) { cerr << opt_parse.option_missing_message() << endl; return EXIT_SUCCESS; } if (leftover_args.empty()) { cerr << opt_parse.help_message() << endl; return EXIT_SUCCESS; } vector<string> filenames(leftover_args); /****************** END COMMAND LINE OPTIONS *****************/ if (FASTQ_OUTPUT && !prb_file.empty()) throw SMITHLABException("fastq output is incompatible " "with specifying a prb file"); const Runif rng(random_number_seed); vector<string> reads, read_names; vector<vector<vector<double> > > probs; vector<size_t> filesizes; double total = 0; for (size_t i = 0; i < filenames.size(); ++i) { filesizes.push_back(get_filesize(filenames[i])); total += filesizes.back(); } if (VERBOSE) cerr << "[OBTAINING READ SAMPLE DISTRIBUTION]"; vector<size_t> samples; for (size_t i = 0; i < filesizes.size(); ++i) samples.push_back(n_reads*filesizes[i]/total); if (VERBOSE) cerr << "[DONE]" << endl; if (!outfile.empty()) ofstream out(outfile.c_str()); if (!prb_file.empty()) ofstream prb(prb_file.c_str()); for (size_t i = 0; i < filenames.size(); ++i) { if (isdir(filenames[i].c_str())) throw SMITHLABException("\"" + filenames[i] + "\" not a FASTA format sequence file?"); if (VERBOSE) cerr << "[LOADING=" << filenames[i] << "]"; vector<string> names, sequences; read_fasta_file(filenames[i].c_str(), names, sequences); if (VERBOSE) cerr << "[DONE]" << endl; double sub_total = 0; for (size_t k = 0; k < sequences.size(); ++k) sub_total += sequences[i].length(); vector<size_t> sub_samples; for (size_t k = 0; k < sequences.size(); ++k) sub_samples.push_back(samples[i]*sequences[i].length()/sub_total); for (size_t j = 0; j < names.size(); ++j) { if (VERBOSE) cerr << "[PROCESSING CHROM=" << names[j] << "]" << endl; const size_t offset = names[j].find(':'); const string name(names[j].substr(0, offset)); transform(sequences[j].begin(), sequences[j].end(), sequences[j].begin(), std::ptr_fun(&toupper)); simreads(FASTQ_OUTPUT, outfile, prb_file, rng, sub_samples[i], read_width, max_errors, name, sequences[j], read_names, reads, probs); } } } catch (std::bad_alloc &ba) { cerr << "ERROR: could not allocate memory" << endl; return EXIT_FAILURE; } catch (SMITHLABException &e) { cerr << e.what() << endl; return EXIT_FAILURE; } catch (std::exception &e) { cerr << "ERROR: " << e.what() << endl; return EXIT_FAILURE; } return EXIT_SUCCESS; }
int main(int argc, const char **argv) { try { string prb_file; string seqfile; string outfile; size_t n_reads = 1000; size_t read_width = 25; size_t max_errors = 0; double meth_rate = 0.0; double bs_rate = 1.0; size_t random_number_seed = numeric_limits<size_t>::max(); bool VERBOSE = false; bool FASTQ_OUTPUT = false; bool AG_WILDCARD = false; /****************** COMMAND LINE OPTIONS ********************/ static OptionParser opt_parse("simreadsbs", "program for generating simulated bisulfite treated reads with " "simulated quality scores", "<fasta-chrom-files>"); opt_parse.add_opt("output", 'o', "Name of output file (default: stdout)", false , outfile); opt_parse.add_opt("reads", 'n', "number of reads to simulate", false , n_reads); opt_parse.add_opt("width", 'w', "width of reads to simulate", false, read_width); opt_parse.add_opt("err", 'e', "maximum number of simulated sequencing errors", false, max_errors); opt_parse.add_opt("verbose", 'v', "print more run info", false, VERBOSE); opt_parse.add_opt("fastq", 'q', "write FASTQ format reads", false, FASTQ_OUTPUT); opt_parse.add_opt("prob", 'p', "prb output file", false, prb_file); opt_parse.add_opt("meth", 'm', "rate of CpG methylation", false, meth_rate); opt_parse.add_opt("bs", 'b', "rate of bisulfite conversion", false, bs_rate); opt_parse.add_opt("ag", 'A', "generate A/G wildcard reads", false, AG_WILDCARD); opt_parse.add_opt("seed", 'S', "random number seed", false, random_number_seed); vector<string> leftover_args; opt_parse.parse(argc, argv, leftover_args); if (argc == 1 || opt_parse.help_requested()) { cerr << opt_parse.help_message() << endl; return EXIT_SUCCESS; } if (opt_parse.about_requested()) { cerr << opt_parse.about_message() << endl; return EXIT_SUCCESS; } if (opt_parse.option_missing()) { cerr << opt_parse.option_missing_message() << endl; return EXIT_SUCCESS; } if (leftover_args.empty()) { cerr << opt_parse.help_message() << endl; return EXIT_SUCCESS; } vector<string> filenames(leftover_args); /****************** END COMMAND LINE OPTIONS *****************/ if (FASTQ_OUTPUT && !prb_file.empty()) throw SMITHLABException("fastq output is incompatible " "with specifying a prb file"); const Runif rng(random_number_seed); vector<string> reads, read_names; vector<vector<vector<double> > > probs; vector<size_t> filesizes; double total = 0; for (size_t i = 0; i < filenames.size(); ++i) { filesizes.push_back(get_filesize(filenames[i])); total += filesizes.back(); } vector<size_t> samples; for (size_t i = 0; i < filesizes.size(); ++i) samples.push_back(n_reads*filesizes[i]/total); if (!outfile.empty()) ofstream out(outfile.c_str()); if (!prb_file.empty()) ofstream prb(prb_file.c_str()); for (size_t i = 0; i < filenames.size(); ++i) { if (VERBOSE) cerr << filenames[i] << endl; vector<string> names, sequences; read_fasta_file(filenames[i].c_str(), names, sequences); for (size_t j = 0; j < names.size(); ++j) { const size_t offset = names[j].find(':'); const string name(names[j].substr(0, offset)); simreads_bs(FASTQ_OUTPUT, AG_WILDCARD, outfile, prb_file, rng, samples[i], read_width, max_errors, bs_rate, meth_rate, name, sequences[j], read_names, reads, probs); } } } catch (std::bad_alloc &ba) { cerr << "ERROR: could not allocate memory" << endl; return EXIT_FAILURE; } catch (SMITHLABException &e) { cerr << e.what() << endl; return EXIT_FAILURE; } catch (std::exception &e) { cerr << "ERROR: " << e.what() << endl; return EXIT_FAILURE; } return EXIT_SUCCESS; }