Пример #1
0
/** Return a FASTA comment for the specified path. */
static void pathToComment(ostream& out,
		const Graph& g, const ContigPath& path)
{
	assert(path.size() > 1);
	out << get(vertex_name, g, path.front());
	if (path.size() == 3)
		out << ',' << get(vertex_name, g, path[1]);
	else if (path.size() > 3)
		out << ",...";
	out << ',' << get(vertex_name, g, path.back());
}
Пример #2
0
/** Return the length of the specified path in k-mer. */
static unsigned calculatePathLength(const Graph& g,
		const ContigNode& origin,
		const ContigPath& path, size_t prefix = 0, size_t suffix = 0)
{
	if (prefix + suffix == path.size())
		return 0;
	assert(prefix + suffix < path.size());
	int length = addProp(g, path.begin() + prefix,
			path.end() - suffix).length;

	// Account for the overlap on the left.
	vertex_descriptor u = prefix == 0 ? origin : path[prefix - 1];
	length += getDistance(g, u, path[prefix]);
	assert(length > 0);
	return length;
}
Пример #3
0
/** Remove the overlapping portion of the specified contig. */
static void removeContigs(ContigPath& path,
		unsigned first, unsigned last)
{
	assert(first <= path.size());
	assert(last <= path.size());
	if (first < last) {
		recordTrimmedContigs(path.begin(), path.begin() + first);
		recordTrimmedContigs(path.begin() + last, path.end());
		path.erase(path.begin() + last, path.end());
		path.erase(path.begin(), path.begin() + first);
	} else {
		recordTrimmedContigs(path.begin(), path.end());
		path.clear();
	}
	removeAmbiguousContigs(path);
}
Пример #4
0
/** Check whether path starts with the sequence [first, last). */
static bool startsWith(ContigPath path, bool rc,
		ContigPath::const_iterator first,
		ContigPath::const_iterator last)
{
	if (rc)
		reverseComplement(path.begin(), path.end());
	assert(*first == path.front());
	assert(first < last);
	return unsigned(last - first) > path.size() ? false
		: equal(first, last, path.begin());
}
Пример #5
0
/** Add distances to a path. */
static ContigPath addDistance(const Graph& g, const ContigPath& path)
{
	ContigPath out;
	out.reserve(path.size());
	ContigNode u = path.front();
	out.push_back(u);
	for (ContigPath::const_iterator it = path.begin() + 1;
			it != path.end(); ++it) {
		ContigNode v = *it;
		int distance = getDistance(g, u, v);
		if (distance >= 0) {
			int numN = distance + opt::k - 1; // by convention
			assert(numN >= 0);
			numN = max(numN, 1);
			out.push_back(ContigNode(numN, 'N'));
		}
		out.push_back(v);
		u = v;
	}
	return out;
}
Пример #6
0
static void* worker(void* pArg)
{
	WorkerArg& arg = *static_cast<WorkerArg*>(pArg);
	for (;;) {
		/** Lock the input stream. */
		static pthread_mutex_t inMutex = PTHREAD_MUTEX_INITIALIZER;
		pthread_mutex_lock(&inMutex);
		EstimateRecord er;
		bool good = (*arg.in) >> er;
		pthread_mutex_unlock(&inMutex);
		if (!good)
			break;

		// Flip the anterior distance estimates.
		for (Estimates::iterator it = er.estimates[1].begin();
				it != er.estimates[1].end(); ++it)
			it->first ^= 1;

		ContigPath path;
		handleEstimate(*arg.graph, er, true, path);
		reverseComplement(path.begin(), path.end());
		path.push_back(ContigNode(er.refID, false));
		handleEstimate(*arg.graph, er, false, path);
		if (path.size() > 1) {
			/** Lock the output stream. */
			static pthread_mutex_t outMutex
				= PTHREAD_MUTEX_INITIALIZER;
			pthread_mutex_lock(&outMutex);
			*arg.out << get(g_contigNames, er.refID)
				<< '\t' << path << '\n';
			assert(arg.out->good());
			pthread_mutex_unlock(&outMutex);
		}
	}
	return NULL;
}
Пример #7
0
/** Return an ambiguous path that agrees with all the given paths. */
static ContigPath constructAmbiguousPath(const Graph &g,
		const ContigNode& origin, const ContigPaths& paths)
{
	assert(!paths.empty());

	// Find the size of the smallest path.
	const ContigPath& firstSol = paths.front();
	size_t min_len = firstSol.size();
	for (ContigPaths::const_iterator it = paths.begin() + 1;
			it != paths.end(); ++it)
		min_len = min(min_len, it->size());

	// Find the longest prefix.
	ContigPath vppath;
	size_t longestPrefix;
	bool commonPrefix = true;
	for (longestPrefix = 0;
			longestPrefix < min_len; longestPrefix++) {
		const ContigNode& common_path_node = firstSol[longestPrefix];
		for (ContigPaths::const_iterator solIter = paths.begin();
				solIter != paths.end(); ++solIter) {
			const ContigNode& pathnode = (*solIter)[longestPrefix];
			if (pathnode != common_path_node) {
				// Found the longest prefix.
				commonPrefix = false;
				break;
			}
		}
		if (!commonPrefix)
			break;
		vppath.push_back(common_path_node);
	}

	// Find the longest suffix.
	ContigPath vspath;
	size_t longestSuffix;
	bool commonSuffix = true;
	for (longestSuffix = 0;
			longestSuffix < min_len-longestPrefix; longestSuffix++) {
		const ContigNode& common_path_node
			= firstSol[firstSol.size()-longestSuffix-1];
		for (ContigPaths::const_iterator solIter = paths.begin();
				solIter != paths.end(); ++solIter) {
			const ContigNode& pathnode
				= (*solIter)[solIter->size()-longestSuffix-1];
			if (pathnode != common_path_node) {
				// Found the longest suffix.
				commonSuffix = false;
				break;
			}
		}
		if (!commonSuffix)
			break;
		vspath.push_back(common_path_node);
	}

	ContigPath out;
	out.reserve(vppath.size() + 1 + vspath.size());
	out.insert(out.end(), vppath.begin(), vppath.end());
	if (longestSuffix > 0) {
		const ContigPath& longestPath(
				*max_element(paths.begin(), paths.end(),
					ComparePathLength(g, origin)));
		unsigned length = calculatePathLength(g, origin, longestPath,
				longestPrefix, longestSuffix);

		// Account for the overlap on the right.
		int dist = length + getDistance(g,
				longestSuffix == longestPath.size() ? origin
				: *(longestPath.rbegin() + longestSuffix),
				*(longestPath.rbegin() + longestSuffix - 1));

		// Add k-1 because it is the convention.
		int numN = dist + opt::k - 1;
		assert(numN > 0);

		out.push_back(ContigNode(numN, 'N'));
		out.insert(out.end(), vspath.rbegin(), vspath.rend());
	}
	return out;
}
Пример #8
0
	bool operator()(const ContigPath& a, const ContigPath& b) const {
		unsigned lenA = calculatePathLength(m_g, m_origin, a);
		unsigned lenB = calculatePathLength(m_g, m_origin, b);
		return lenA < lenB
			|| lenA == lenB && a.size() < b.size();
	}
Пример #9
0
/** Return true if both paths are equal, ignoring ambiguous nodes. */
static bool equalIgnoreAmbiguos(const ContigPath& a,
		const ContigPath& b)
{
	return a.size() == b.size()
		&& equal(a.begin(), a.end(), b.begin(), equalOrBothAmbiguos);
}
Пример #10
0
/** Find an equivalent region of the two specified paths.
 * @param[out] orientation the orientation of the alignment
 * @return the consensus sequence
 */
static ContigPath align(const Lengths& lengths,
		const ContigPath& path1, const ContigPath& path2,
		ContigNode pivot, dir_type& orientation)
{
	if (&path1 == &path2) {
		// Ignore the trivial alignment when aligning a path to
		// itself.
	} else if (path1 == path2) {
		// These two paths are identical.
		orientation = DIR_B;
		return path1;
	} else {
		ContigPath::const_iterator it
			= search(path1.begin(), path1.end(),
				path2.begin(), path2.end());
		if (it != path1.end()) {
			// path2 is subsumed in path1.
			// Determine the orientation of the edge.
			orientation
				= it == path1.begin() ? DIR_R
				: it + path2.size() == path1.end() ? DIR_F
				: DIR_B;
			return path1;
		}
	}

	// Find a suitable pivot.
	if (find(path1.begin(), path1.end(), pivot) == path1.end()
			|| find(path2.begin(), path2.end(), pivot)
				== path2.end()) {
		bool good;
		tie(pivot, good) = findPivot(path1, path2);
		if (!good)
			return ContigPath();
	}
	assert(find(path1.begin(), path1.end(), pivot) != path1.end());

	ContigPath::const_iterator it2 = find(path2.begin(), path2.end(),
			pivot);
	assert(it2 != path2.end());
	if (&path1 != &path2) {
		// The seed must be unique in path2, unless we're aligning a
		// path to itself.
		assert(count(it2+1, path2.end(), pivot) == 0);
	}

	ContigPath consensus;
	for (ContigPath::const_iterator it1 = find_if(
				path1.begin(), path1.end(),
				bind2nd(equal_to<ContigNode>(), pivot));
			it1 != path1.end();
			it1 = find_if(it1+1, path1.end(),
				bind2nd(equal_to<ContigNode>(), pivot))) {
		if (&*it1 == &*it2) {
			// We are aligning a path to itself, and this is the
			// trivial alignment, which we'll ignore.
			continue;
		}
		consensus = align(lengths,
				path1, path2, it1, it2, orientation);
		if (!consensus.empty())
			return consensus;
	}
	return consensus;
}