Ejemplo n.º 1
0
int main(int argc, char* const* argv)
{
    bool die = false;
    for (int c; (c = getopt_long(argc, argv,
                                 shortopts, longopts, NULL)) != -1;) {
        istringstream arg(optarg != NULL ? optarg : "");
        switch (c) {
        case '?':
            die = true;
            break;
        case 'l':
            arg >> opt::k;
            break;
        case 'c':
            arg >> opt::c;
            break;
        case 'd':
            arg >> opt::distPath;
            break;
        case 'f':
            arg >> opt::fragPath;
            break;
        case 'h':
            arg >> opt::histPath;
            break;
        case 'v':
            opt::verbose++;
            break;
        case OPT_HELP:
            cout << USAGE_MESSAGE;
            exit(EXIT_SUCCESS);
        case OPT_VERSION:
            cout << VERSION_MESSAGE;
            exit(EXIT_SUCCESS);
        }
        if (optarg != NULL && !arg.eof()) {
            cerr << PROGRAM ": invalid option: `-"
                 << (char)c << optarg << "'\n";
            exit(EXIT_FAILURE);
        }
    }

    if (opt::k <= 0 && opt::inputFormat == opt::KALIGNER) {
        cerr << PROGRAM ": " << "missing -k,--kmer option\n";
        die = true;
    }

    if (die) {
        cerr << "Try `" << PROGRAM
             << " --help' for more information.\n";
        exit(EXIT_FAILURE);
    }

    if (!opt::fragPath.empty()) {
        fragFile.open(opt::fragPath.c_str());
        assert(fragFile.is_open());
    }

    ReadAlignMap alignTable(1);
    if (optind < argc) {
        for_each(argv + optind, argv + argc,
                 bind2nd(ptr_fun(readAlignmentsFile), &alignTable));
    } else {
        if (opt::verbose > 0)
            cerr << "Reading from standard input..." << endl;
        readAlignments(cin, &alignTable);
    }
    if (opt::verbose > 0)
        cerr << "Read " << stats.alignments << " alignments" << endl;

    unsigned numRF = histogram.count(INT_MIN, 0);
    unsigned numFR = histogram.count(1, INT_MAX);
    size_t sum = alignTable.size()
                 + stats.bothUnaligned + stats.oneUnaligned
                 + numFR + numRF + stats.numFF
                 + stats.numDifferent + stats.numMulti + stats.numSplit;
    cerr <<
         "Mateless   " << percent(alignTable.size(), sum) << "\n"
         "Unaligned  " << percent(stats.bothUnaligned, sum) << "\n"
         "Singleton  " << percent(stats.oneUnaligned, sum) << "\n"
         "FR         " << percent(numFR, sum) << "\n"
         "RF         " << percent(numRF, sum) << "\n"
         "FF         " << percent(stats.numFF, sum) << "\n"
         "Different  " << percent(stats.numDifferent, sum) << "\n"
         "Multimap   " << percent(stats.numMulti, sum) << "\n"
         "Split      " << percent(stats.numSplit, sum) << "\n"
         "Total      " << sum << endl;

    if (!opt::distPath.empty())
        generateDistFile();

    if (!opt::fragPath.empty())
        fragFile.close();

    if (!opt::histPath.empty()) {
        ofstream histFile(opt::histPath.c_str());
        assert(histFile.is_open());
        histFile << histogram;
        assert(histFile.good());
        histFile.close();
    }

    if (numFR < numRF)
        histogram = histogram.negate();
    histogram.eraseNegative();
    histogram.removeNoise();
    histogram.removeOutliers();
    Histogram h = histogram.trimFraction(0.0001);
    if (opt::verbose > 0)
        cerr << "Stats mean: " << setprecision(4) << h.mean() << " "
             "median: " << setprecision(4) << h.median() << " "
             "sd: " << setprecision(4) << h.sd() << " "
             "n: " << h.size() << " "
             "min: " << h.minimum() << " max: " << h.maximum() << '\n'
             << h.barplot() << endl;

    if (stats.numFF > numFR && stats.numFF > numRF) {
        cerr << "error: The mate pairs of this library are oriented "
             "forward-forward (FF), which is not supported by ABySS."
             << endl;
        exit(EXIT_FAILURE);
    }

    return 0;
}
Ejemplo n.º 2
0
int main(int argc, char** argv)
{
	bool die = false;
	for (int c; (c = getopt_long(argc, argv,
					shortopts, longopts, NULL)) != -1;) {
		istringstream arg(optarg != NULL ? optarg : "");
		switch (c) {
			case '?': die = true; break;
			case OPT_MIND:
				arg >> opt::minDist;
				break;
			case OPT_MAXD:
				arg >> opt::maxDist;
				break;
			case 'l':
				arg >> opt::minAlign;
				break;
			case 'j': arg >> opt::threads; break;
			case 'k': arg >> opt::k; break;
			case 'n': arg >> opt::npairs; break;
			case 'o': arg >> opt::out; break;
			case 'q': arg >> opt::minMapQ; break;
			case 's': arg >> opt::seedLen; break;
			case 'v': opt::verbose++; break;
			case OPT_HELP:
				cout << USAGE_MESSAGE;
				exit(EXIT_SUCCESS);
			case OPT_VERSION:
				cout << VERSION_MESSAGE;
				exit(EXIT_SUCCESS);
		}
		if (optarg != NULL && !arg.eof()) {
			cerr << PROGRAM ": invalid option: `-"
				<< (char)c << optarg << "'\n";
			exit(EXIT_FAILURE);
		}
	}

	if (opt::k <= 0) {
		cerr << PROGRAM ": missing -k,--kmer option\n";
		die = true;
	}

	if (opt::seedLen <= 0) {
		cerr << PROGRAM ": missing -s,--seed-length option\n";
		die = true;
	}

	if (opt::npairs <= 0) {
		cerr << PROGRAM ": missing -n,--npairs option\n";
		die = true;
	}

	if (argc - optind < 1) {
		cerr << PROGRAM ": missing arguments\n";
		die = true;
	} else if (argc - optind > 2) {
		cerr << PROGRAM ": too many arguments\n";
		die = true;
	}

	if (die) {
		cerr << "Try `" << PROGRAM
			<< " --help' for more information.\n";
		exit(EXIT_FAILURE);
	}

	if (opt::seedLen < 2*opt::k)
		cerr << "warning: the seed-length should be at least twice k:"
			" k=" << opt::k << ", s=" << opt::seedLen << '\n';

	assert(opt::minAlign > 0);

#if _OPENMP
	if (opt::threads > 0)
		omp_set_num_threads(opt::threads);
#endif

	string distanceCountFile(argv[optind++]);
	string alignFile(argv[optind] == NULL ? "-" : argv[optind++]);

	ifstream inFile(alignFile.c_str());
	istream& in(strcmp(alignFile.c_str(), "-") == 0 ? cin : inFile);

	if (strcmp(alignFile.c_str(), "-") != 0)
		assert_good(inFile, alignFile);

	ofstream outFile;
	if (!opt::out.empty()) {
		outFile.open(opt::out.c_str());
		assert(outFile.is_open());
	}
	ostream& out = opt::out.empty() ? cout : outFile;

	if (opt::format == DOT)
		out << "digraph dist {\ngraph ["
			"k=" << opt::k << " "
			"s=" << opt::seedLen << " "
			"n=" << opt::npairs << "]\n";

	// The fragment size histogram may not be written out until after
	// the alignments complete. Wait for the alignments to complete.
	in.peek();

	// Read the fragment size distribution.
	Histogram distanceHist = loadHist(distanceCountFile);
	unsigned numRF = distanceHist.count(INT_MIN, 0);
	unsigned numFR = distanceHist.count(1, INT_MAX);
	unsigned numTotal = distanceHist.size();
	bool libRF = numFR < numRF;
	if (opt::verbose > 0) {
		cerr << "Mate orientation FR: " << numFR << setprecision(3)
			<< " (" << (float)100*numFR/numTotal << "%)"
			<< " RF: " << numRF << setprecision(3)
			<< " (" << (float)100*numRF/numTotal << "%)\n"
			<< "The library " << distanceCountFile << " is oriented "
			<< (libRF
					? "reverse-forward (RF)" : "forward-reverse (FR)")
			<< ".\n";
	}

	// Determine the orientation of the library.
	if (opt::rf == -1)
		opt::rf = libRF;
	if (opt::rf)
		distanceHist = distanceHist.negate();
	if (opt::rf != libRF)
		cerr << "warning: The orientation is forced to "
			<< (opt::rf
					? "reverse-forward (RF)" : "forward-reverse (FR)")
			<< " which differs from the detected orientation.\n";

	distanceHist.eraseNegative();
	distanceHist.removeNoise();
	distanceHist.removeOutliers();
	Histogram h = distanceHist.trimFraction(0.0001);
	if (opt::verbose > 0)
		cerr << "Stats mean: " << setprecision(4) << h.mean() << " "
			"median: " << setprecision(4) << h.median() << " "
			"sd: " << setprecision(4) << h.sd() << " "
			"n: " << h.size() << " "
			"min: " << h.minimum() << " max: " << h.maximum() << '\n'
			<< h.barplot() << endl;
	PMF pmf(h);

	if (opt::minDist == numeric_limits<int>::min())
		opt::minDist = -opt::k + 1;
	if (opt::maxDist == numeric_limits<int>::max())
		opt::maxDist = pmf.maxValue();
	if (opt::verbose > 0)
		cerr << "Minimum and maximum distance are set to "
			<< opt::minDist << " and " << opt::maxDist << " bp.\n";
	assert(opt::minDist < opt::maxDist);

	// Read the contig lengths.
	vector<unsigned> contigLens;
	readContigLengths(in, contigLens);
	g_contigNames.lock();

	// Estimate the distances between contigs.
	istream_iterator<SAMRecord> it(in), last;
	if (contigLens.size() == 1) {
		// When mapping to a single contig, no alignments spanning
		// contigs are expected.
		assert(in.eof());
		exit(EXIT_SUCCESS);
	}
	assert(in);

	g_recMA = opt::minAlign;
#pragma omp parallel
	for (vector<SAMRecord> records;;) {
		records.clear();
#pragma omp critical(in)
		readPairs(it, last, records);
		if (records.empty())
			break;
		writeEstimates(out, records, contigLens, pmf);
	}

	if (opt::verbose > 0) {
		float prop_dups = (float)100 * stats.dup_frags / stats.total_frags;
		cerr << "Duplicate rate of spanning fragments: "
			<< stats.dup_frags << "/"
			<< stats.total_frags << " ("
			<< setprecision(3) << prop_dups << "%)\n";
		if (prop_dups > 0.5)
			cerr << PROGRAM << ": warning: duplicate rate of fragments "
				"spanning more than one contig is high.\n";
	}

	if (opt::verbose > 0 && g_recMA != opt::minAlign)
		cerr << PROGRAM << ": warning: MLE will be more accurate if "
			"l is decreased to " << g_recMA << ".\n";

	assert(in.eof());

	if (opt::format == DOT)
		out << "}\n";
	return 0;
}