/** Return a FASTA comment for the specified path. */ static void pathToComment(ostream& out, const Graph& g, const ContigPath& path) { assert(path.size() > 1); out << get(vertex_name, g, path.front()); if (path.size() == 3) out << ',' << get(vertex_name, g, path[1]); else if (path.size() > 3) out << ",..."; out << ',' << get(vertex_name, g, path.back()); }
/** Return the length of the specified path in k-mer. */ static unsigned calculatePathLength(const Graph& g, const ContigNode& origin, const ContigPath& path, size_t prefix = 0, size_t suffix = 0) { if (prefix + suffix == path.size()) return 0; assert(prefix + suffix < path.size()); int length = addProp(g, path.begin() + prefix, path.end() - suffix).length; // Account for the overlap on the left. vertex_descriptor u = prefix == 0 ? origin : path[prefix - 1]; length += getDistance(g, u, path[prefix]); assert(length > 0); return length; }
/** Remove the overlapping portion of the specified contig. */ static void removeContigs(ContigPath& path, unsigned first, unsigned last) { assert(first <= path.size()); assert(last <= path.size()); if (first < last) { recordTrimmedContigs(path.begin(), path.begin() + first); recordTrimmedContigs(path.begin() + last, path.end()); path.erase(path.begin() + last, path.end()); path.erase(path.begin(), path.begin() + first); } else { recordTrimmedContigs(path.begin(), path.end()); path.clear(); } removeAmbiguousContigs(path); }
/** Check whether path starts with the sequence [first, last). */ static bool startsWith(ContigPath path, bool rc, ContigPath::const_iterator first, ContigPath::const_iterator last) { if (rc) reverseComplement(path.begin(), path.end()); assert(*first == path.front()); assert(first < last); return unsigned(last - first) > path.size() ? false : equal(first, last, path.begin()); }
/** Add distances to a path. */ static ContigPath addDistance(const Graph& g, const ContigPath& path) { ContigPath out; out.reserve(path.size()); ContigNode u = path.front(); out.push_back(u); for (ContigPath::const_iterator it = path.begin() + 1; it != path.end(); ++it) { ContigNode v = *it; int distance = getDistance(g, u, v); if (distance >= 0) { int numN = distance + opt::k - 1; // by convention assert(numN >= 0); numN = max(numN, 1); out.push_back(ContigNode(numN, 'N')); } out.push_back(v); u = v; } return out; }
static void* worker(void* pArg) { WorkerArg& arg = *static_cast<WorkerArg*>(pArg); for (;;) { /** Lock the input stream. */ static pthread_mutex_t inMutex = PTHREAD_MUTEX_INITIALIZER; pthread_mutex_lock(&inMutex); EstimateRecord er; bool good = (*arg.in) >> er; pthread_mutex_unlock(&inMutex); if (!good) break; // Flip the anterior distance estimates. for (Estimates::iterator it = er.estimates[1].begin(); it != er.estimates[1].end(); ++it) it->first ^= 1; ContigPath path; handleEstimate(*arg.graph, er, true, path); reverseComplement(path.begin(), path.end()); path.push_back(ContigNode(er.refID, false)); handleEstimate(*arg.graph, er, false, path); if (path.size() > 1) { /** Lock the output stream. */ static pthread_mutex_t outMutex = PTHREAD_MUTEX_INITIALIZER; pthread_mutex_lock(&outMutex); *arg.out << get(g_contigNames, er.refID) << '\t' << path << '\n'; assert(arg.out->good()); pthread_mutex_unlock(&outMutex); } } return NULL; }
/** Return an ambiguous path that agrees with all the given paths. */ static ContigPath constructAmbiguousPath(const Graph &g, const ContigNode& origin, const ContigPaths& paths) { assert(!paths.empty()); // Find the size of the smallest path. const ContigPath& firstSol = paths.front(); size_t min_len = firstSol.size(); for (ContigPaths::const_iterator it = paths.begin() + 1; it != paths.end(); ++it) min_len = min(min_len, it->size()); // Find the longest prefix. ContigPath vppath; size_t longestPrefix; bool commonPrefix = true; for (longestPrefix = 0; longestPrefix < min_len; longestPrefix++) { const ContigNode& common_path_node = firstSol[longestPrefix]; for (ContigPaths::const_iterator solIter = paths.begin(); solIter != paths.end(); ++solIter) { const ContigNode& pathnode = (*solIter)[longestPrefix]; if (pathnode != common_path_node) { // Found the longest prefix. commonPrefix = false; break; } } if (!commonPrefix) break; vppath.push_back(common_path_node); } // Find the longest suffix. ContigPath vspath; size_t longestSuffix; bool commonSuffix = true; for (longestSuffix = 0; longestSuffix < min_len-longestPrefix; longestSuffix++) { const ContigNode& common_path_node = firstSol[firstSol.size()-longestSuffix-1]; for (ContigPaths::const_iterator solIter = paths.begin(); solIter != paths.end(); ++solIter) { const ContigNode& pathnode = (*solIter)[solIter->size()-longestSuffix-1]; if (pathnode != common_path_node) { // Found the longest suffix. commonSuffix = false; break; } } if (!commonSuffix) break; vspath.push_back(common_path_node); } ContigPath out; out.reserve(vppath.size() + 1 + vspath.size()); out.insert(out.end(), vppath.begin(), vppath.end()); if (longestSuffix > 0) { const ContigPath& longestPath( *max_element(paths.begin(), paths.end(), ComparePathLength(g, origin))); unsigned length = calculatePathLength(g, origin, longestPath, longestPrefix, longestSuffix); // Account for the overlap on the right. int dist = length + getDistance(g, longestSuffix == longestPath.size() ? origin : *(longestPath.rbegin() + longestSuffix), *(longestPath.rbegin() + longestSuffix - 1)); // Add k-1 because it is the convention. int numN = dist + opt::k - 1; assert(numN > 0); out.push_back(ContigNode(numN, 'N')); out.insert(out.end(), vspath.rbegin(), vspath.rend()); } return out; }
bool operator()(const ContigPath& a, const ContigPath& b) const { unsigned lenA = calculatePathLength(m_g, m_origin, a); unsigned lenB = calculatePathLength(m_g, m_origin, b); return lenA < lenB || lenA == lenB && a.size() < b.size(); }
/** Return true if both paths are equal, ignoring ambiguous nodes. */ static bool equalIgnoreAmbiguos(const ContigPath& a, const ContigPath& b) { return a.size() == b.size() && equal(a.begin(), a.end(), b.begin(), equalOrBothAmbiguos); }
/** Find an equivalent region of the two specified paths. * @param[out] orientation the orientation of the alignment * @return the consensus sequence */ static ContigPath align(const Lengths& lengths, const ContigPath& path1, const ContigPath& path2, ContigNode pivot, dir_type& orientation) { if (&path1 == &path2) { // Ignore the trivial alignment when aligning a path to // itself. } else if (path1 == path2) { // These two paths are identical. orientation = DIR_B; return path1; } else { ContigPath::const_iterator it = search(path1.begin(), path1.end(), path2.begin(), path2.end()); if (it != path1.end()) { // path2 is subsumed in path1. // Determine the orientation of the edge. orientation = it == path1.begin() ? DIR_R : it + path2.size() == path1.end() ? DIR_F : DIR_B; return path1; } } // Find a suitable pivot. if (find(path1.begin(), path1.end(), pivot) == path1.end() || find(path2.begin(), path2.end(), pivot) == path2.end()) { bool good; tie(pivot, good) = findPivot(path1, path2); if (!good) return ContigPath(); } assert(find(path1.begin(), path1.end(), pivot) != path1.end()); ContigPath::const_iterator it2 = find(path2.begin(), path2.end(), pivot); assert(it2 != path2.end()); if (&path1 != &path2) { // The seed must be unique in path2, unless we're aligning a // path to itself. assert(count(it2+1, path2.end(), pivot) == 0); } ContigPath consensus; for (ContigPath::const_iterator it1 = find_if( path1.begin(), path1.end(), bind2nd(equal_to<ContigNode>(), pivot)); it1 != path1.end(); it1 = find_if(it1+1, path1.end(), bind2nd(equal_to<ContigNode>(), pivot))) { if (&*it1 == &*it2) { // We are aligning a path to itself, and this is the // trivial alignment, which we'll ignore. continue; } consensus = align(lengths, path1, path2, it1, it2, orientation); if (!consensus.empty()) return consensus; } return consensus; }