예제 #1
0
/** Align the specified pair of sequences.
 * @return the number of matches and size of the consensus
 */
static pair<unsigned, unsigned> alignPair(
		const string& seqa, const string& seqb)
{
	NWAlignment alignment;
	unsigned matches = alignGlobal(seqa, seqb, alignment);
	if (opt::verbose > 2)
#pragma omp critical(cerr)
		cerr << alignment;
	return make_pair(matches, alignment.size());
}
예제 #2
0
/* Resolve ambiguous region using pairwise alignment
 * (Needleman-Wunsch) ('solutions' contain exactly two paths, from a
 * source contig to a dest contig)
 */
static ContigPath alignPair(const Graph& g,
		const ContigPaths& solutions, ofstream& out)
{
	assert(solutions.size() == 2);
	assert(solutions[0].size() > 1);
	assert(solutions[1].size() > 1);
	assert(solutions[0].front() == solutions[1].front());
	assert(solutions[0].back() == solutions[1].back());
	ContigPath fstSol(solutions[0].begin()+1, solutions[0].end()-1);
	ContigPath sndSol(solutions[1].begin()+1, solutions[1].end()-1);

	if (fstSol.empty() || sndSol.empty()) {
		// This entire sequence may be deleted.
		const ContigPath& sol(fstSol.empty() ? sndSol : fstSol);
		assert(!sol.empty());
		Sequence consensus(mergePath(g, sol));
		assert(consensus.size() > opt::k - 1);
		string::iterator first = consensus.begin() + opt::k - 1;
		transform(first, consensus.end(), first, ::tolower);

		unsigned match = opt::k - 1;
		float identity = (float)match / consensus.size();
		if (opt::verbose > 2)
			cerr << consensus << '\n';
		if (opt::verbose > 1)
			cerr << identity
				<< (identity < opt::identity ? " (too low)\n" : "\n");
		if (identity < opt::identity)
			return ContigPath();

		unsigned coverage = calculatePathProperties(g, sol).coverage;
		ContigNode u = outputNewContig(g,
				solutions, 1, 1, consensus, coverage, out);
		ContigPath path;
		path.push_back(solutions.front().front());
		path.push_back(u);
		path.push_back(solutions.front().back());
		return path;
	}

	Sequence fstPathContig(mergePath(g, fstSol));
	Sequence sndPathContig(mergePath(g, sndSol));
	if (fstPathContig == sndPathContig) {
		// These two paths have identical sequence.
		if (fstSol.size() == sndSol.size()) {
			// A perfect match must be caused by palindrome.
			typedef ContigPath::const_iterator It;
			pair<It, It> it = mismatch(
					fstSol.begin(), fstSol.end(), sndSol.begin());
			assert(it.first != fstSol.end());
			assert(it.second != sndSol.end());
			assert(*it.first
					== get(vertex_complement, g, *it.second));
			assert(equal(it.first+1, It(fstSol.end()), it.second+1));
			if (opt::verbose > 1)
				cerr << "Palindrome: "
					<< get(vertex_contig_name, g, *it.first) << '\n';
			return solutions[0];
		} else {
			// The paths are different lengths.
			cerr << PROGRAM ": warning: "
				"Two paths have identical sequence, which may be "
				"caused by a transitive edge in the overlap graph.\n"
				<< '\t' << fstSol << '\n'
				<< '\t' << sndSol << '\n';
			return solutions[fstSol.size() > sndSol.size() ? 0 : 1];
		}
	}

	unsigned minLength = min(
			fstPathContig.length(), sndPathContig.length());
	unsigned maxLength = max(
			fstPathContig.length(), sndPathContig.length());
	float lengthRatio = (float)minLength / maxLength;
	if (lengthRatio < opt::identity) {
		if (opt::verbose > 1)
			cerr << minLength << '\t' << maxLength
				<< '\t' << lengthRatio << "\t(different length)\n";
		return ContigPath();
	}

	NWAlignment align;
	unsigned match = alignGlobal(fstPathContig, sndPathContig,
		   	align);
	float identity = (float)match / align.size();
	if (opt::verbose > 2)
		cerr << align;
	if (opt::verbose > 1)
		cerr << identity
			<< (identity < opt::identity ? " (too low)\n" : "\n");
	if (identity < opt::identity)
		return ContigPath();

	unsigned coverage = calculatePathProperties(g, fstSol).coverage
		+ calculatePathProperties(g, sndSol).coverage;
	ContigNode u = outputNewContig(g, solutions, 1, 1,
			align.consensus(), coverage, out);
	ContigPath path;
	path.push_back(solutions.front().front());
	path.push_back(u);
	path.push_back(solutions.front().back());
	return path;
}