/** Merge the paths of the specified seed path. * @return the merged contig path */ static ContigPath mergePath(const Lengths& lengths, const ContigPathMap& paths, const ContigPath& seedPath) { assert(!seedPath.empty()); ContigNode seed1 = seedPath.front(); ContigPathMap::const_iterator path1It = paths.find(seed1.contigIndex()); assert(path1It != paths.end()); ContigPath path(path1It->second); if (seedPath.front().sense()) reverseComplement(path.begin(), path.end()); if (opt::verbose > 1) #pragma omp critical(cout) cout << "\n* " << seedPath << '\n' << get(g_contigNames, seedPath.front()) << '\t' << path << '\n'; for (ContigPath::const_iterator it = seedPath.begin() + 1; it != seedPath.end(); ++it) { ContigNode seed2 = *it; ContigPathMap::const_iterator path2It = paths.find(seed2.contigIndex()); assert(path2It != paths.end()); ContigPath path2 = path2It->second; if (seed2.sense()) reverseComplement(path2.begin(), path2.end()); ContigNode pivot = find(path.begin(), path.end(), seed2) != path.end() ? seed2 : seed1; ContigPath consensus = align(lengths, path, path2, pivot); if (consensus.empty()) { // This seed could be removed from the seed path. if (opt::verbose > 1) #pragma omp critical(cout) cout << get(g_contigNames, seed2) << '\t' << path2 << '\n' << "\tinvalid\n"; } else { path.swap(consensus); if (opt::verbose > 1) #pragma omp critical(cout) cout << get(g_contigNames, seed2) << '\t' << path2 << '\n' << '\t' << path << '\n'; } seed1 = seed2; } return path; }
/** Remove ambiguous contigs from the ends of the path. */ static void removeAmbiguousContigs(ContigPath& path) { if (!path.empty() && path.back().ambiguous()) path.erase(path.end() - 1); if (!path.empty() && path.front().ambiguous()) path.erase(path.begin()); }
/** Remove ambiguous edges that overlap by only a small amount. * Remove the edge (u,v) if deg+(u) > 1 and deg-(v) > 1 and the * overlap of (u,v) is small. */ static void removeSmallOverlaps(PathGraph& g, const ContigPathMap& paths) { typedef graph_traits<PathGraph>::edge_descriptor E; typedef graph_traits<PathGraph>::out_edge_iterator Eit; typedef graph_traits<PathGraph>::vertex_descriptor V; typedef graph_traits<PathGraph>::vertex_iterator Vit; vector<E> edges; pair<Vit, Vit> urange = vertices(g); for (Vit uit = urange.first; uit != urange.second; ++uit) { V u = *uit; if (out_degree(u, g) < 2) continue; ContigPath pathu = getPath(paths, u); pair<Eit, Eit> uvits = out_edges(u, g); for (Eit uvit = uvits.first; uvit != uvits.second; ++uvit) { E uv = *uvit; V v = target(uv, g); assert(v != u); if (in_degree(v, g) < 2) continue; ContigPath pathv = getPath(paths, v); if (pathu.back() == pathv.front() && paths.count(pathu.back().contigIndex()) > 0) edges.push_back(uv); } } remove_edges(g, edges.begin(), edges.end()); if (opt::verbose > 0) cout << "Removed " << edges.size() << " small overlap edges.\n"; if (!opt::db.empty()) addToDb(db, "Edges_removed_small_overlap", edges.size()); }
/** Return a FASTA comment for the specified path. */ static void pathToComment(ostream& out, const Graph& g, const ContigPath& path) { assert(path.size() > 1); out << get(vertex_name, g, path.front()); if (path.size() == 3) out << ',' << get(vertex_name, g, path[1]); else if (path.size() > 3) out << ",..."; out << ',' << get(vertex_name, g, path.back()); }
/** Check whether path starts with the sequence [first, last). */ static bool startsWith(ContigPath path, bool rc, ContigPath::const_iterator first, ContigPath::const_iterator last) { if (rc) reverseComplement(path.begin(), path.end()); assert(*first == path.front()); assert(first < last); return unsigned(last - first) > path.size() ? false : equal(first, last, path.begin()); }
/** Merge a sequence of overlapping paths. */ static ContigPath mergePaths(const Paths& paths, const OverlapMap& overlaps, const ContigPath& merge) { assert(!merge.empty()); ContigNode u = merge.front(); ContigPath path(getPath(paths, u)); for (ContigPath::const_iterator it = merge.begin() + 1; it != merge.end(); ++it) { ContigNode v = *it; ContigPath vpath(getPath(paths, v)); unsigned overlap = getOverlap(overlaps, u, v); assert(path.size() > overlap); assert(vpath.size() > overlap); assert(equal(path.end() - overlap, path.end(), vpath.begin())); path.insert(path.end(), vpath.begin() + overlap, vpath.end()); u = v; } return path; }
/** Add distances to a path. */ static ContigPath addDistance(const Graph& g, const ContigPath& path) { ContigPath out; out.reserve(path.size()); ContigNode u = path.front(); out.push_back(u); for (ContigPath::const_iterator it = path.begin() + 1; it != path.end(); ++it) { ContigNode v = *it; int distance = getDistance(g, u, v); if (distance >= 0) { int numN = distance + opt::k - 1; // by convention assert(numN >= 0); numN = max(numN, 1); out.push_back(ContigNode(numN, 'N')); } out.push_back(v); u = v; } return out; }
/** Return whether this path is a cycle. */ static bool isCycle(const Lengths& lengths, const ContigPath& path) { return !align(lengths, path, path, path.front()).empty(); }