int runMe(int argc, char* argv[]) { ArgProcessor args(argc, argv); if(args.isArgSet("--help") || (!(args.isArgSet("--reads") && ( args.isArgSet("--kmers") || args.isArgSet("--kmers_from_reads") ) )) ) { cerr << usage(args) << endl << endl; exit(1); } string reads_fasta_file = args.getStringVal("--reads"); bool is_DS = (! args.isArgSet("--SS")); if(args.isArgSet("--kmer_size")) { KMER_SIZE = args.getIntVal("--kmer_size"); if(KMER_SIZE < 20) { cerr << "Error, min kmer size is 20"; exit(2); } } if(args.isArgSet("--monitor")) { IRKE_COMMON::MONITOR = args.getIntVal("--monitor"); } if (args.isArgSet("--num_threads")) { int num_threads = args.getIntVal("--num_threads"); if (num_threads < MAX_THREADS) { omp_set_num_threads(num_threads); } else { // set to max omp_set_num_threads(MAX_THREADS); } } if(omp_get_max_threads() > MAX_THREADS) { omp_set_num_threads(MAX_THREADS); } KmerCounter kcounter (KMER_SIZE, is_DS); if (args.isArgSet("--kmers")) { string kmers_fasta_file = args.getStringVal("--kmers"); populate_kmer_counter_from_kmers(kcounter, kmers_fasta_file); } else { string kmer_read_source_fasta_file = args.getStringVal("--kmers_from_reads"); populate_kmer_counter_from_reads(kcounter, kmer_read_source_fasta_file); } Fasta_reader fasta_reader(reads_fasta_file); bool write_coverage_info = args.isArgSet("--capture_coverage_info"); int start_time = time(NULL); #pragma omp parallel while (true) { if (! fasta_reader.hasNext()) break; int myTid = omp_get_thread_num(); Fasta_entry fe = fasta_reader.getNext(); string sequence = fe.get_sequence(); if(sequence == "") continue; string header = fe.get_header(); vector<unsigned int> kmer_coverage = compute_kmer_coverage(sequence, kcounter); unsigned int median_cov = median_coverage(kmer_coverage); float mean_cov = mean(kmer_coverage); float stdev = stDev(kmer_coverage); float pct_stdev_of_avg = stdev/mean_cov*100; stringstream stats_text; stats_text << median_cov << "\t" << mean_cov << "\t" << stdev << "\t" << pct_stdev_of_avg << "\t" << fe.get_accession(); stats_text << "\tthread:" << myTid; if(write_coverage_info) { // add the coverage info stats_text << "\t"; for (size_t i = 0; i < kmer_coverage.size(); i++) { stats_text<< kmer_coverage[i]; if(i != kmer_coverage.size() - 1) { stats_text<< ","; } } } stats_text << endl; #pragma omp critical { cout << stats_text.str(); } if (mean_cov < 0) { cerr << "ERROR, cannot have negative coverage!!" << endl; exit(1); } } int end_time = time(NULL); cerr << "STATS_GENERATION_TIME: " << (end_time - start_time) << " seconds." << endl; return(0); }
int main(int argc, char* argv[]) { ArgProcessor args(argc, argv); if(args.isArgSet("--help") || (!(args.isArgSet("--reads") && args.isArgSet("--kmers")))) { cerr << usage(args) << endl << endl; exit(1); } string reads_fasta_file = args.getStringVal("--reads"); string kmers_fasta_file = args.getStringVal("--kmers"); bool is_DS = (! args.isArgSet("--SS")); if(args.isArgSet("--kmer_size")) { KMER_SIZE = args.getIntVal("--kmer_size"); if(KMER_SIZE < 20) { cerr << "Error, min kmer size is 20"; exit(2); } } if(args.isArgSet("--monitor")) { IRKE_COMMON::MONITOR = args.getIntVal("--monitor"); } if(omp_get_max_threads() > MAX_THREADS) { omp_set_num_threads(MAX_THREADS); } KmerCounter kcounter (KMER_SIZE, is_DS); populate_kmer_counter(kcounter, kmers_fasta_file); Fasta_reader fasta_reader(reads_fasta_file); ofstream* filewriter = NULL; ofstream* covwriter = NULL; bool write_coverage_info = args.isArgSet("--capture_coverage_info"); while (true) { Fasta_entry fe = fasta_reader.getNext(); string sequence = fe.get_sequence(); if(sequence == "") break; string header = fe.get_header(); vector<unsigned int> kmer_coverage = compute_kmer_coverage(sequence, kcounter); unsigned int median_cov = median_coverage(kmer_coverage); float mean_cov = mean(kmer_coverage); float stdev = stDev(kmer_coverage); float pct_stdev_of_avg = stdev/mean_cov*100; stringstream stats_text; stats_text << median_cov << "\t" << mean_cov << "\t" << stdev << "\t" << pct_stdev_of_avg << "\t" << fe.get_accession(); if(write_coverage_info) { // add the coverage info stats_text << "\t"; for (int i = 0; i < kmer_coverage.size(); i++) { stats_text<< kmer_coverage[i]; if(i != kmer_coverage.size() - 1) { stats_text<< ","; } } } stats_text << endl; cout << stats_text.str(); if (mean_cov < 0) { cerr << "ERROR, cannot have negative coverage!!" << endl; exit(1); } } return(0); }