static void findOverlap(const Graph& g, ContigID refID, bool rc, const ContigNode& pair, const DistanceEst& est, OverlapGraph& out) { if (refID == pair.id() || (est.distance >= 0 && !opt::scaffold)) return; ContigNode ref(refID, false); const ContigNode& t = rc ? pair : ref; const ContigNode& h = rc ? ref : pair; if (out_degree(t, g) > 0 || in_degree(h, g) > 0 || edge(t, h, out).second) return; bool mask = false; unsigned overlap = est.distance - (int)allowedError(est.stdDev) <= 0 ? findOverlap(g, t, h, mask) : 0; if (mask && !opt::mask) return; if (overlap > 0 || opt::scaffold) add_edge(t, h, Overlap(est, overlap, mask), out); }
/** Return a path, complemented if necessary. */ static ContigPath getPath(const Paths& paths, const ContigNode& u) { if (isPath(u)) { unsigned i = u.id() - Vertex::s_offset; return u.sense() ? reverseComplement(paths[i]) : paths[i]; } else return ContigPath(1, u); }
/** Return the sequence of the specified contig node. The sequence * may be ambiguous or reverse complemented. */ static Sequence sequence(const Contigs& contigs, const ContigNode& id) { if (id.ambiguous()) { string s(id.ambiguousSequence()); if (s.length() < opt::k) transform(s.begin(), s.end(), s.begin(), ::tolower); return string(opt::k - 1, 'N') + s; } else { const Sequence& seq = contigs[id.id()].seq; return id.sense() ? reverseComplement(seq) : seq; } }
/** Return the sequence of the specified contig node. The sequence * may be ambiguous or reverse complemented. */ static const Sequence getSequence(ContigNode id) { if (id.ambiguous()) { string s(id.ambiguousSequence()); if (s.length() < opt::k) transform(s.begin(), s.end(), s.begin(), ::tolower); return string(opt::k - 1, 'N') + s; } else { string seq(g_contigs[id.id()]); return id.sense() ? reverseComplement(seq) : seq; } }
/** Return the sequence of the specified contig. */ static string sequence(const ContigNode& id) { const string& seq = g_contigs[id.id()]; return id.sense() ? reverseComplement(seq) : seq; }
/** Return whether this vertex is a path or a contig. */ static bool isPath(const ContigNode& u) { return u.id() >= Vertex::s_offset; }
/** Identify paths subsumed by the specified path. * @param overlaps [out] paths that are found to overlap * @return the ID of the subsuming path */ static ContigID identifySubsumedPaths(const Lengths& lengths, ContigPathMap::const_iterator path1It, ContigPathMap& paths, set<ContigID>& out, set<ContigID>& overlaps) { ostringstream vout; out.clear(); ContigID id(path1It->first); const ContigPath& path = path1It->second; if (gDebugPrint) vout << get(g_contigNames, ContigNode(id, false)) << '\t' << path << '\n'; for (ContigPath::const_iterator it = path.begin(); it != path.end(); ++it) { ContigNode pivot = *it; if (pivot.ambiguous() || pivot.id() == id) continue; ContigPathMap::iterator path2It = paths.find(pivot.contigIndex()); if (path2It == paths.end()) continue; ContigPath path2 = path2It->second; if (pivot.sense()) reverseComplement(path2.begin(), path2.end()); ContigPath consensus = align(lengths, path, path2, pivot); if (consensus.empty()) continue; if (equalIgnoreAmbiguos(consensus, path)) { if (gDebugPrint) vout << get(g_contigNames, pivot) << '\t' << path2 << '\n'; out.insert(path2It->first); } else if (equalIgnoreAmbiguos(consensus, path2)) { // This path is larger. Use it as the seed. return identifySubsumedPaths(lengths, path2It, paths, out, overlaps); } else if (isCycle(lengths, consensus)) { // The consensus path is a cycle. bool isCyclePath1 = isCycle(lengths, path); bool isCyclePath2 = isCycle(lengths, path2); if (!isCyclePath1 && !isCyclePath2) { // Neither path is a cycle. if (gDebugPrint) vout << get(g_contigNames, pivot) << '\t' << path2 << '\n' << "ignored\t" << consensus << '\n'; overlaps.insert(id); overlaps.insert(path2It->first); } else { // At least one path is a cycle. if (gDebugPrint) vout << get(g_contigNames, pivot) << '\t' << path2 << '\n' << "cycle\t" << consensus << '\n'; if (isCyclePath1 && isCyclePath2) out.insert(path2It->first); else if (!isCyclePath1) overlaps.insert(id); else if (!isCyclePath2) overlaps.insert(path2It->first); } } else { if (gDebugPrint) vout << get(g_contigNames, pivot) << '\t' << path2 << '\n' << "ignored\t" << consensus << '\n'; overlaps.insert(id); overlaps.insert(path2It->first); } } cout << vout.str(); return id; }
/** Return the length of the specified contig in k-mer. */ static unsigned getLength(const Lengths& lengths, const ContigNode& u) { return u.ambiguous() ? u.length() : lengths.at(u.id()); }