int main(int argc, char* const* argv) { bool die = false; for (int c; (c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1;) { istringstream arg(optarg != NULL ? optarg : ""); switch (c) { case '?': die = true; break; case 'l': arg >> opt::k; break; case 'c': arg >> opt::c; break; case 'd': arg >> opt::distPath; break; case 'f': arg >> opt::fragPath; break; case 'h': arg >> opt::histPath; break; case 'v': opt::verbose++; break; case OPT_HELP: cout << USAGE_MESSAGE; exit(EXIT_SUCCESS); case OPT_VERSION: cout << VERSION_MESSAGE; exit(EXIT_SUCCESS); } if (optarg != NULL && !arg.eof()) { cerr << PROGRAM ": invalid option: `-" << (char)c << optarg << "'\n"; exit(EXIT_FAILURE); } } if (opt::k <= 0 && opt::inputFormat == opt::KALIGNER) { cerr << PROGRAM ": " << "missing -k,--kmer option\n"; die = true; } if (die) { cerr << "Try `" << PROGRAM << " --help' for more information.\n"; exit(EXIT_FAILURE); } if (!opt::fragPath.empty()) { fragFile.open(opt::fragPath.c_str()); assert(fragFile.is_open()); } ReadAlignMap alignTable(1); if (optind < argc) { for_each(argv + optind, argv + argc, bind2nd(ptr_fun(readAlignmentsFile), &alignTable)); } else { if (opt::verbose > 0) cerr << "Reading from standard input..." << endl; readAlignments(cin, &alignTable); } if (opt::verbose > 0) cerr << "Read " << stats.alignments << " alignments" << endl; unsigned numRF = histogram.count(INT_MIN, 0); unsigned numFR = histogram.count(1, INT_MAX); size_t sum = alignTable.size() + stats.bothUnaligned + stats.oneUnaligned + numFR + numRF + stats.numFF + stats.numDifferent + stats.numMulti + stats.numSplit; cerr << "Mateless " << percent(alignTable.size(), sum) << "\n" "Unaligned " << percent(stats.bothUnaligned, sum) << "\n" "Singleton " << percent(stats.oneUnaligned, sum) << "\n" "FR " << percent(numFR, sum) << "\n" "RF " << percent(numRF, sum) << "\n" "FF " << percent(stats.numFF, sum) << "\n" "Different " << percent(stats.numDifferent, sum) << "\n" "Multimap " << percent(stats.numMulti, sum) << "\n" "Split " << percent(stats.numSplit, sum) << "\n" "Total " << sum << endl; if (!opt::distPath.empty()) generateDistFile(); if (!opt::fragPath.empty()) fragFile.close(); if (!opt::histPath.empty()) { ofstream histFile(opt::histPath.c_str()); assert(histFile.is_open()); histFile << histogram; assert(histFile.good()); histFile.close(); } if (numFR < numRF) histogram = histogram.negate(); histogram.eraseNegative(); histogram.removeNoise(); histogram.removeOutliers(); Histogram h = histogram.trimFraction(0.0001); if (opt::verbose > 0) cerr << "Stats mean: " << setprecision(4) << h.mean() << " " "median: " << setprecision(4) << h.median() << " " "sd: " << setprecision(4) << h.sd() << " " "n: " << h.size() << " " "min: " << h.minimum() << " max: " << h.maximum() << '\n' << h.barplot() << endl; if (stats.numFF > numFR && stats.numFF > numRF) { cerr << "error: The mate pairs of this library are oriented " "forward-forward (FF), which is not supported by ABySS." << endl; exit(EXIT_FAILURE); } return 0; }
int main(int argc, char** argv) { bool die = false; for (int c; (c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1;) { istringstream arg(optarg != NULL ? optarg : ""); switch (c) { case '?': die = true; break; case OPT_MIND: arg >> opt::minDist; break; case OPT_MAXD: arg >> opt::maxDist; break; case 'l': arg >> opt::minAlign; break; case 'j': arg >> opt::threads; break; case 'k': arg >> opt::k; break; case 'n': arg >> opt::npairs; break; case 'o': arg >> opt::out; break; case 'q': arg >> opt::minMapQ; break; case 's': arg >> opt::seedLen; break; case 'v': opt::verbose++; break; case OPT_HELP: cout << USAGE_MESSAGE; exit(EXIT_SUCCESS); case OPT_VERSION: cout << VERSION_MESSAGE; exit(EXIT_SUCCESS); } if (optarg != NULL && !arg.eof()) { cerr << PROGRAM ": invalid option: `-" << (char)c << optarg << "'\n"; exit(EXIT_FAILURE); } } if (opt::k <= 0) { cerr << PROGRAM ": missing -k,--kmer option\n"; die = true; } if (opt::seedLen <= 0) { cerr << PROGRAM ": missing -s,--seed-length option\n"; die = true; } if (opt::npairs <= 0) { cerr << PROGRAM ": missing -n,--npairs option\n"; die = true; } if (argc - optind < 1) { cerr << PROGRAM ": missing arguments\n"; die = true; } else if (argc - optind > 2) { cerr << PROGRAM ": too many arguments\n"; die = true; } if (die) { cerr << "Try `" << PROGRAM << " --help' for more information.\n"; exit(EXIT_FAILURE); } if (opt::seedLen < 2*opt::k) cerr << "warning: the seed-length should be at least twice k:" " k=" << opt::k << ", s=" << opt::seedLen << '\n'; assert(opt::minAlign > 0); #if _OPENMP if (opt::threads > 0) omp_set_num_threads(opt::threads); #endif string distanceCountFile(argv[optind++]); string alignFile(argv[optind] == NULL ? "-" : argv[optind++]); ifstream inFile(alignFile.c_str()); istream& in(strcmp(alignFile.c_str(), "-") == 0 ? cin : inFile); if (strcmp(alignFile.c_str(), "-") != 0) assert_good(inFile, alignFile); ofstream outFile; if (!opt::out.empty()) { outFile.open(opt::out.c_str()); assert(outFile.is_open()); } ostream& out = opt::out.empty() ? cout : outFile; if (opt::format == DOT) out << "digraph dist {\ngraph [" "k=" << opt::k << " " "s=" << opt::seedLen << " " "n=" << opt::npairs << "]\n"; // The fragment size histogram may not be written out until after // the alignments complete. Wait for the alignments to complete. in.peek(); // Read the fragment size distribution. Histogram distanceHist = loadHist(distanceCountFile); unsigned numRF = distanceHist.count(INT_MIN, 0); unsigned numFR = distanceHist.count(1, INT_MAX); unsigned numTotal = distanceHist.size(); bool libRF = numFR < numRF; if (opt::verbose > 0) { cerr << "Mate orientation FR: " << numFR << setprecision(3) << " (" << (float)100*numFR/numTotal << "%)" << " RF: " << numRF << setprecision(3) << " (" << (float)100*numRF/numTotal << "%)\n" << "The library " << distanceCountFile << " is oriented " << (libRF ? "reverse-forward (RF)" : "forward-reverse (FR)") << ".\n"; } // Determine the orientation of the library. if (opt::rf == -1) opt::rf = libRF; if (opt::rf) distanceHist = distanceHist.negate(); if (opt::rf != libRF) cerr << "warning: The orientation is forced to " << (opt::rf ? "reverse-forward (RF)" : "forward-reverse (FR)") << " which differs from the detected orientation.\n"; distanceHist.eraseNegative(); distanceHist.removeNoise(); distanceHist.removeOutliers(); Histogram h = distanceHist.trimFraction(0.0001); if (opt::verbose > 0) cerr << "Stats mean: " << setprecision(4) << h.mean() << " " "median: " << setprecision(4) << h.median() << " " "sd: " << setprecision(4) << h.sd() << " " "n: " << h.size() << " " "min: " << h.minimum() << " max: " << h.maximum() << '\n' << h.barplot() << endl; PMF pmf(h); if (opt::minDist == numeric_limits<int>::min()) opt::minDist = -opt::k + 1; if (opt::maxDist == numeric_limits<int>::max()) opt::maxDist = pmf.maxValue(); if (opt::verbose > 0) cerr << "Minimum and maximum distance are set to " << opt::minDist << " and " << opt::maxDist << " bp.\n"; assert(opt::minDist < opt::maxDist); // Read the contig lengths. vector<unsigned> contigLens; readContigLengths(in, contigLens); g_contigNames.lock(); // Estimate the distances between contigs. istream_iterator<SAMRecord> it(in), last; if (contigLens.size() == 1) { // When mapping to a single contig, no alignments spanning // contigs are expected. assert(in.eof()); exit(EXIT_SUCCESS); } assert(in); g_recMA = opt::minAlign; #pragma omp parallel for (vector<SAMRecord> records;;) { records.clear(); #pragma omp critical(in) readPairs(it, last, records); if (records.empty()) break; writeEstimates(out, records, contigLens, pmf); } if (opt::verbose > 0) { float prop_dups = (float)100 * stats.dup_frags / stats.total_frags; cerr << "Duplicate rate of spanning fragments: " << stats.dup_frags << "/" << stats.total_frags << " (" << setprecision(3) << prop_dups << "%)\n"; if (prop_dups > 0.5) cerr << PROGRAM << ": warning: duplicate rate of fragments " "spanning more than one contig is high.\n"; } if (opt::verbose > 0 && g_recMA != opt::minAlign) cerr << PROGRAM << ": warning: MLE will be more accurate if " "l is decreased to " << g_recMA << ".\n"; assert(in.eof()); if (opt::format == DOT) out << "}\n"; return 0; }