/** Align the specified pair of sequences. * @return the number of matches and size of the consensus */ static pair<unsigned, unsigned> alignPair( const string& seqa, const string& seqb) { NWAlignment alignment; unsigned matches = alignGlobal(seqa, seqb, alignment); if (opt::verbose > 2) #pragma omp critical(cerr) cerr << alignment; return make_pair(matches, alignment.size()); }
/* Resolve ambiguous region using pairwise alignment * (Needleman-Wunsch) ('solutions' contain exactly two paths, from a * source contig to a dest contig) */ static ContigPath alignPair(const Graph& g, const ContigPaths& solutions, ofstream& out) { assert(solutions.size() == 2); assert(solutions[0].size() > 1); assert(solutions[1].size() > 1); assert(solutions[0].front() == solutions[1].front()); assert(solutions[0].back() == solutions[1].back()); ContigPath fstSol(solutions[0].begin()+1, solutions[0].end()-1); ContigPath sndSol(solutions[1].begin()+1, solutions[1].end()-1); if (fstSol.empty() || sndSol.empty()) { // This entire sequence may be deleted. const ContigPath& sol(fstSol.empty() ? sndSol : fstSol); assert(!sol.empty()); Sequence consensus(mergePath(g, sol)); assert(consensus.size() > opt::k - 1); string::iterator first = consensus.begin() + opt::k - 1; transform(first, consensus.end(), first, ::tolower); unsigned match = opt::k - 1; float identity = (float)match / consensus.size(); if (opt::verbose > 2) cerr << consensus << '\n'; if (opt::verbose > 1) cerr << identity << (identity < opt::identity ? " (too low)\n" : "\n"); if (identity < opt::identity) return ContigPath(); unsigned coverage = calculatePathProperties(g, sol).coverage; ContigNode u = outputNewContig(g, solutions, 1, 1, consensus, coverage, out); ContigPath path; path.push_back(solutions.front().front()); path.push_back(u); path.push_back(solutions.front().back()); return path; } Sequence fstPathContig(mergePath(g, fstSol)); Sequence sndPathContig(mergePath(g, sndSol)); if (fstPathContig == sndPathContig) { // These two paths have identical sequence. if (fstSol.size() == sndSol.size()) { // A perfect match must be caused by palindrome. typedef ContigPath::const_iterator It; pair<It, It> it = mismatch( fstSol.begin(), fstSol.end(), sndSol.begin()); assert(it.first != fstSol.end()); assert(it.second != sndSol.end()); assert(*it.first == get(vertex_complement, g, *it.second)); assert(equal(it.first+1, It(fstSol.end()), it.second+1)); if (opt::verbose > 1) cerr << "Palindrome: " << get(vertex_contig_name, g, *it.first) << '\n'; return solutions[0]; } else { // The paths are different lengths. cerr << PROGRAM ": warning: " "Two paths have identical sequence, which may be " "caused by a transitive edge in the overlap graph.\n" << '\t' << fstSol << '\n' << '\t' << sndSol << '\n'; return solutions[fstSol.size() > sndSol.size() ? 0 : 1]; } } unsigned minLength = min( fstPathContig.length(), sndPathContig.length()); unsigned maxLength = max( fstPathContig.length(), sndPathContig.length()); float lengthRatio = (float)minLength / maxLength; if (lengthRatio < opt::identity) { if (opt::verbose > 1) cerr << minLength << '\t' << maxLength << '\t' << lengthRatio << "\t(different length)\n"; return ContigPath(); } NWAlignment align; unsigned match = alignGlobal(fstPathContig, sndPathContig, align); float identity = (float)match / align.size(); if (opt::verbose > 2) cerr << align; if (opt::verbose > 1) cerr << identity << (identity < opt::identity ? " (too low)\n" : "\n"); if (identity < opt::identity) return ContigPath(); unsigned coverage = calculatePathProperties(g, fstSol).coverage + calculatePathProperties(g, sndSol).coverage; ContigNode u = outputNewContig(g, solutions, 1, 1, align.consensus(), coverage, out); ContigPath path; path.push_back(solutions.front().front()); path.push_back(u); path.push_back(solutions.front().back()); return path; }