/** Finds all contigs used in each path in paths, and * marks them as seen in the vector seen. */ static void seenContigs(vector<bool>& seen, const ContigPaths& paths) { for (ContigPaths::const_iterator it = paths.begin(); it != paths.end(); ++it) for (ContigPath::const_iterator itc = it->begin(); itc != it->end(); ++itc) if (itc->id() < seen.size()) seen[itc->id()] = true; }
/** Mark contigs for removal. An empty path indicates that a contig * should be removed. */ static void markRemovedContigs(vector<bool>& marked, const vector<string>& pathIDs, const ContigPaths& paths) { for (ContigPaths::const_iterator it = paths.begin(); it != paths.end(); ++it) { if (it->empty()) { size_t i = get(g_contigNames, pathIDs[it - paths.begin()]); assert(i < marked.size()); marked[i] = true; } } }
/** Return the set of contigs that appear more than once in a single * solution. */ static set<ContigID> findRepeats(ContigID seed, const ContigPaths& solutions) { set<ContigID> repeats; for (ContigPaths::const_iterator solIt = solutions.begin(); solIt != solutions.end(); ++solIt) { map<ContigID, unsigned> count; count[seed]++; for (ContigPath::const_iterator it = solIt->begin(); it != solIt->end(); ++it) count[it->contigIndex()]++; for (map<ContigID, unsigned>::const_iterator it = count.begin(); it != count.end(); ++it) if (it->second > 1) repeats.insert(it->first); } return repeats; }
/** Return an ambiguous path that agrees with all the given paths. */ static ContigPath constructAmbiguousPath(const Graph &g, const ContigNode& origin, const ContigPaths& paths) { assert(!paths.empty()); // Find the size of the smallest path. const ContigPath& firstSol = paths.front(); size_t min_len = firstSol.size(); for (ContigPaths::const_iterator it = paths.begin() + 1; it != paths.end(); ++it) min_len = min(min_len, it->size()); // Find the longest prefix. ContigPath vppath; size_t longestPrefix; bool commonPrefix = true; for (longestPrefix = 0; longestPrefix < min_len; longestPrefix++) { const ContigNode& common_path_node = firstSol[longestPrefix]; for (ContigPaths::const_iterator solIter = paths.begin(); solIter != paths.end(); ++solIter) { const ContigNode& pathnode = (*solIter)[longestPrefix]; if (pathnode != common_path_node) { // Found the longest prefix. commonPrefix = false; break; } } if (!commonPrefix) break; vppath.push_back(common_path_node); } // Find the longest suffix. ContigPath vspath; size_t longestSuffix; bool commonSuffix = true; for (longestSuffix = 0; longestSuffix < min_len-longestPrefix; longestSuffix++) { const ContigNode& common_path_node = firstSol[firstSol.size()-longestSuffix-1]; for (ContigPaths::const_iterator solIter = paths.begin(); solIter != paths.end(); ++solIter) { const ContigNode& pathnode = (*solIter)[solIter->size()-longestSuffix-1]; if (pathnode != common_path_node) { // Found the longest suffix. commonSuffix = false; break; } } if (!commonSuffix) break; vspath.push_back(common_path_node); } ContigPath out; out.reserve(vppath.size() + 1 + vspath.size()); out.insert(out.end(), vppath.begin(), vppath.end()); if (longestSuffix > 0) { const ContigPath& longestPath( *max_element(paths.begin(), paths.end(), ComparePathLength(g, origin))); unsigned length = calculatePathLength(g, origin, longestPath, longestPrefix, longestSuffix); // Account for the overlap on the right. int dist = length + getDistance(g, longestSuffix == longestPath.size() ? origin : *(longestPath.rbegin() + longestSuffix), *(longestPath.rbegin() + longestSuffix - 1)); // Add k-1 because it is the convention. int numN = dist + opt::k - 1; assert(numN > 0); out.push_back(ContigNode(numN, 'N')); out.insert(out.end(), vspath.rbegin(), vspath.rend()); } return out; }