Пример #1
0
/** Finds all contigs used in each path in paths, and
 * marks them as seen in the vector seen. */
static void seenContigs(vector<bool>& seen, const ContigPaths& paths)
{
	for (ContigPaths::const_iterator it = paths.begin();
			it != paths.end(); ++it)
		for (ContigPath::const_iterator itc = it->begin();
				itc != it->end(); ++itc)
			if (itc->id() < seen.size())
				seen[itc->id()] = true;
}
Пример #2
0
/** Mark contigs for removal. An empty path indicates that a contig
 * should be removed.
 */
static void markRemovedContigs(vector<bool>& marked,
		const vector<string>& pathIDs, const ContigPaths& paths)
{
	for (ContigPaths::const_iterator it = paths.begin();
			it != paths.end(); ++it) {
		if (it->empty()) {
			size_t i = get(g_contigNames,
					pathIDs[it - paths.begin()]);
			assert(i < marked.size());
			marked[i] = true;
		}
	}
}
Пример #3
0
/** Return the set of contigs that appear more than once in a single
 * solution.
 */
static set<ContigID> findRepeats(ContigID seed,
	const ContigPaths& solutions)
{
	set<ContigID> repeats;
	for (ContigPaths::const_iterator solIt = solutions.begin();
			solIt != solutions.end(); ++solIt) {
		map<ContigID, unsigned> count;
		count[seed]++;
		for (ContigPath::const_iterator it = solIt->begin();
				it != solIt->end(); ++it)
			count[it->contigIndex()]++;
		for (map<ContigID, unsigned>::const_iterator
				it = count.begin(); it != count.end(); ++it)
			if (it->second > 1)
				repeats.insert(it->first);
	}
	return repeats;
}
Пример #4
0
/** Return an ambiguous path that agrees with all the given paths. */
static ContigPath constructAmbiguousPath(const Graph &g,
		const ContigNode& origin, const ContigPaths& paths)
{
	assert(!paths.empty());

	// Find the size of the smallest path.
	const ContigPath& firstSol = paths.front();
	size_t min_len = firstSol.size();
	for (ContigPaths::const_iterator it = paths.begin() + 1;
			it != paths.end(); ++it)
		min_len = min(min_len, it->size());

	// Find the longest prefix.
	ContigPath vppath;
	size_t longestPrefix;
	bool commonPrefix = true;
	for (longestPrefix = 0;
			longestPrefix < min_len; longestPrefix++) {
		const ContigNode& common_path_node = firstSol[longestPrefix];
		for (ContigPaths::const_iterator solIter = paths.begin();
				solIter != paths.end(); ++solIter) {
			const ContigNode& pathnode = (*solIter)[longestPrefix];
			if (pathnode != common_path_node) {
				// Found the longest prefix.
				commonPrefix = false;
				break;
			}
		}
		if (!commonPrefix)
			break;
		vppath.push_back(common_path_node);
	}

	// Find the longest suffix.
	ContigPath vspath;
	size_t longestSuffix;
	bool commonSuffix = true;
	for (longestSuffix = 0;
			longestSuffix < min_len-longestPrefix; longestSuffix++) {
		const ContigNode& common_path_node
			= firstSol[firstSol.size()-longestSuffix-1];
		for (ContigPaths::const_iterator solIter = paths.begin();
				solIter != paths.end(); ++solIter) {
			const ContigNode& pathnode
				= (*solIter)[solIter->size()-longestSuffix-1];
			if (pathnode != common_path_node) {
				// Found the longest suffix.
				commonSuffix = false;
				break;
			}
		}
		if (!commonSuffix)
			break;
		vspath.push_back(common_path_node);
	}

	ContigPath out;
	out.reserve(vppath.size() + 1 + vspath.size());
	out.insert(out.end(), vppath.begin(), vppath.end());
	if (longestSuffix > 0) {
		const ContigPath& longestPath(
				*max_element(paths.begin(), paths.end(),
					ComparePathLength(g, origin)));
		unsigned length = calculatePathLength(g, origin, longestPath,
				longestPrefix, longestSuffix);

		// Account for the overlap on the right.
		int dist = length + getDistance(g,
				longestSuffix == longestPath.size() ? origin
				: *(longestPath.rbegin() + longestSuffix),
				*(longestPath.rbegin() + longestSuffix - 1));

		// Add k-1 because it is the convention.
		int numN = dist + opt::k - 1;
		assert(numN > 0);

		out.push_back(ContigNode(numN, 'N'));
		out.insert(out.end(), vspath.rbegin(), vspath.rend());
	}
	return out;
}