Beispiel #1
0
static void findOverlap(const Graph& g,
		ContigID refID, bool rc,
		const ContigNode& pair,
		const DistanceEst& est,
		OverlapGraph& out)
{
	if (refID == pair.id()
			|| (est.distance >= 0 && !opt::scaffold))
		return;
	ContigNode ref(refID, false);
	const ContigNode& t = rc ? pair : ref;
	const ContigNode& h = rc ? ref : pair;
	if (out_degree(t, g) > 0 || in_degree(h, g) > 0
			|| edge(t, h, out).second)
		return;

	bool mask = false;
	unsigned overlap
		= est.distance - (int)allowedError(est.stdDev) <= 0
		? findOverlap(g, t, h, mask) : 0;
	if (mask && !opt::mask)
		return;
	if (overlap > 0 || opt::scaffold)
		add_edge(t, h, Overlap(est, overlap, mask), out);
}
Beispiel #2
0
/** Return a path, complemented if necessary. */
static ContigPath getPath(const Paths& paths, const ContigNode& u)
{
	if (isPath(u)) {
		unsigned i = u.id() - Vertex::s_offset;
		return u.sense() ? reverseComplement(paths[i]) : paths[i];
	} else
		return ContigPath(1, u);
}
Beispiel #3
0
/** Return the sequence of the specified contig node. The sequence
 * may be ambiguous or reverse complemented.
 */
static Sequence sequence(const Contigs& contigs, const ContigNode& id)
{
	if (id.ambiguous()) {
		string s(id.ambiguousSequence());
		if (s.length() < opt::k)
			transform(s.begin(), s.end(), s.begin(), ::tolower);
		return string(opt::k - 1, 'N') + s;
	} else {
		const Sequence& seq = contigs[id.id()].seq;
		return id.sense() ? reverseComplement(seq) : seq;
	}
}
Beispiel #4
0
/** Return the sequence of the specified contig node. The sequence
 * may be ambiguous or reverse complemented.
 */
static const Sequence getSequence(ContigNode id)
{
	if (id.ambiguous()) {
		string s(id.ambiguousSequence());
		if (s.length() < opt::k)
			transform(s.begin(), s.end(), s.begin(), ::tolower);
		return string(opt::k - 1, 'N') + s;
	} else {
		string seq(g_contigs[id.id()]);
		return id.sense() ? reverseComplement(seq) : seq;
	}
}
Beispiel #5
0
/** Return the sequence of the specified contig. */
static string sequence(const ContigNode& id)
{
	const string& seq = g_contigs[id.id()];
	return id.sense() ? reverseComplement(seq) : seq;
}
Beispiel #6
0
/** Return whether this vertex is a path or a contig. */
static bool isPath(const ContigNode& u)
{
	return u.id() >= Vertex::s_offset;
}
/** Identify paths subsumed by the specified path.
 * @param overlaps [out] paths that are found to overlap
 * @return the ID of the subsuming path
 */
static ContigID identifySubsumedPaths(const Lengths& lengths,
		ContigPathMap::const_iterator path1It,
		ContigPathMap& paths,
		set<ContigID>& out,
		set<ContigID>& overlaps)
{
	ostringstream vout;
	out.clear();
	ContigID id(path1It->first);
	const ContigPath& path = path1It->second;
	if (gDebugPrint)
		vout << get(g_contigNames, ContigNode(id, false))
			<< '\t' << path << '\n';

	for (ContigPath::const_iterator it = path.begin();
			it != path.end(); ++it) {
		ContigNode pivot = *it;
		if (pivot.ambiguous() || pivot.id() == id)
			continue;
		ContigPathMap::iterator path2It
			= paths.find(pivot.contigIndex());
		if (path2It == paths.end())
			continue;
		ContigPath path2 = path2It->second;
		if (pivot.sense())
			reverseComplement(path2.begin(), path2.end());
		ContigPath consensus = align(lengths, path, path2, pivot);
		if (consensus.empty())
			continue;
		if (equalIgnoreAmbiguos(consensus, path)) {
			if (gDebugPrint)
				vout << get(g_contigNames, pivot)
					<< '\t' << path2 << '\n';
			out.insert(path2It->first);
		} else if (equalIgnoreAmbiguos(consensus, path2)) {
			// This path is larger. Use it as the seed.
			return identifySubsumedPaths(lengths, path2It, paths, out,
					overlaps);
		} else if (isCycle(lengths, consensus)) {
			// The consensus path is a cycle.
			bool isCyclePath1 = isCycle(lengths, path);
			bool isCyclePath2 = isCycle(lengths, path2);
			if (!isCyclePath1 && !isCyclePath2) {
				// Neither path is a cycle.
				if (gDebugPrint)
					vout << get(g_contigNames, pivot)
						<< '\t' << path2 << '\n'
						<< "ignored\t" << consensus << '\n';
				overlaps.insert(id);
				overlaps.insert(path2It->first);
			} else {
				// At least one path is a cycle.
				if (gDebugPrint)
					vout << get(g_contigNames, pivot)
						<< '\t' << path2 << '\n'
						<< "cycle\t" << consensus << '\n';
				if (isCyclePath1 && isCyclePath2)
					out.insert(path2It->first);
				else if (!isCyclePath1)
					overlaps.insert(id);
				else if (!isCyclePath2)
					overlaps.insert(path2It->first);
			}
		} else {
			if (gDebugPrint)
				vout << get(g_contigNames, pivot)
					<< '\t' << path2 << '\n'
					<< "ignored\t" << consensus << '\n';
			overlaps.insert(id);
			overlaps.insert(path2It->first);
		}
	}
	cout << vout.str();
	return id;
}
/** Return the length of the specified contig in k-mer. */
static unsigned getLength(const Lengths& lengths,
		const ContigNode& u)
{
	return u.ambiguous() ? u.length()
		: lengths.at(u.id());
}