// Translate the SeqCoord c from the frame of coord[0] to coord[1] SeqCoord Match::translate(const SeqCoord& c) const { // In overlap with indelsm the coord[] are not the same length // assert(coord[0].length() == coord[1].length()); // ensure translation is valid SeqCoord out; out.seqlen = coord[1].seqlen; //The offset of start and end should be adjusted according to the indels. out.interval.start = c.interval.start + calculateTranslation(); out.interval.end = c.interval.end + calculateTranslationEnd(); if(isRC()) out.flip(); // the offset t is not accurate under indels overlap // if(out.interval.end >= out.seqlen) // out.interval.end = out.seqlen-1; // if(out.interval.start <0) // out.interval.start = 0; // assert(out.interval.start>=0 && out.interval.start<out.interval.end); return out; }
// Attempt to resolve a predicted overlap between s1 and s2 // Returns true if there overlap was found and the overhang of s2 is placed in outString bool ScaffoldRecord::overlapResolve(const ResolveParams& params, const std::string& s1, const std::string& s2, const ScaffoldLink& link, std::string& outString) const { // Attempt to find an overlap between these sequences int expectedOverlap = -1 * link.distance; // If the maximum overlap was not set, set it to the expected overlap * 3 stddev int upperBound = 0; if(params.maxOverlap == -1) upperBound = static_cast<int>(expectedOverlap + 3.0f * link.stdDev); else upperBound = params.maxOverlap; // Calculate the best match Match match; bool overlapFound = OverlapTools::boundedOverlapDP(s1, s2, params.minOverlap, upperBound, params.maxErrorRate, match); if(overlapFound) { SeqCoord overlapCoord = match.coord[1]; SeqCoord overhangCoord = overlapCoord.complement(); outString = overhangCoord.getSubstring(s2); return true; } else { return false; } }
// Return a seqcoord representing the complement of the interval // For example if the seqcoord represents the matched portion of a string, // this returns a seqcoord of the unmatched portion SeqCoord SeqCoord::complement() const { SeqCoord out; out.seqlen = seqlen; if(isFull()) { out.setEmpty(); } else if(isEmpty()) { out.setFull(); } else if(isLeftExtreme()) { out.interval.start = std::max(interval.start, interval.end) + 1; out.interval.end = out.seqlen - 1; } else { assert(isRightExtreme()); out.interval.start = 0; out.interval.end = std::min(interval.start, interval.end) - 1; } assert(out.isValid()); return out; }
// Calculation the translation offset to shift // a coord[1] position to a coord[0]. This must be calculated // using canonical coordinates int Match::calculateInverseTranslation() const { if(!isRC()) return coord[0].interval.start - coord[1].interval.start; else { SeqCoord f = coord[0]; f.flip(); return f.interval.start - coord[1].interval.start; } }
int Match::calculateTranslationEnd() const { if(!isRC()) return coord[1].interval.end - coord[0].interval.end; else { SeqCoord f = coord[1]; f.flip(); return f.interval.end - coord[0].interval.end; } }
// Get the edge's label std::string Edge::getLabel() const { const Edge* pTwin = getTwin(); const Vertex* pEndpoint = m_pEnd; // get the unmatched coordinates in V2 SeqCoord unmatched = pTwin->getMatchCoord().complement(); std::string seq = unmatched.getSubstring(pEndpoint->getStr()); if(getComp() == EC_REVERSE) seq = reverseComplement(seq); return seq; }
// Translate the SeqCoord c from the frame of coord[1] to coord[0] SeqCoord Match::inverseTranslate(const SeqCoord& c) const { // assert(c.isExtreme()); SeqCoord out; out.seqlen = coord[0].seqlen; //seqlen was extended out.interval.start = c.interval.start + calculateInverseTranslation(); out.interval.end = c.interval.end + calculateInverseTranslationEnd(); if(isRC()) out.flip(); // if((int)c.length() !=(int)out.length()) // std::cout << c.length() << "\t"<< out.length() <<"\n"; // assert(out.interval.start>=0 && out.interval.start<out.interval.end); return out; }
// Attempt to resolve a predicted overlap between s1 and s2 // Returns true if there overlap was found and the overhang of s2 is placed in outString bool ScaffoldRecord::overlapResolve(const ResolveParams& params, const std::string& s1, const std::string& s2, const ScaffoldLink& link, std::string& outString) const { // Attempt to find an overlap between these sequences int expectedOverlap = -1 * link.distance; #ifdef DEBUGRESOLVE std::cout << "Attempting overlap resolve of link to " << link.endpointID << " expected distance: " << link.distance << " orientation: " << link.edgeData.getComp() << "\n"; #endif // If the maximum overlap was not set, set it to the expected overlap * 3 stddev int upperBound = 0; if(params.maxOverlap == -1) upperBound = static_cast<int>(expectedOverlap + 3.0f * link.stdDev); else upperBound = params.maxOverlap; // Calculate the best match Match match; bool overlapFound = OverlapTools::boundedOverlapDP(s1, s2, params.minOverlap, upperBound, params.maxErrorRate, match); if(overlapFound) { #ifdef DEBUGRESOLVE std::cout << "Overlap found, length: " << match.coord[1].length() << "\n"; #endif SeqCoord overlapCoord = match.coord[1]; SeqCoord overhangCoord = overlapCoord.complement(); outString = overhangCoord.getSubstring(s2); return true; } else { return false; } }
// Get the substring of the full path string starting from position fromX // to position toY on the first and last vertices, respectively. // dirX is the direction along contig X towards vertex Y, vis-versa for dirY std::string SGWalk::getFragmentString(const Vertex* pX, const Vertex* pY, int fromX, int toY, EdgeDir dirX, EdgeDir dirY) const { std::string out; // Calculate the portion of X that we should include in the string // If dirX is SENSE, we take the everything after position fromX // otherwise we take everything up to and including fromX SeqCoord xCoord(0,0,pX->getSeqLen()); if(dirX == ED_SENSE) { xCoord.interval.start = fromX; xCoord.interval.end = pX->getSeqLen() - 1; } else { xCoord.interval.start = 0; xCoord.interval.end = fromX; } // Handle the trivial case where pX == pY and the walk is found immediately if(m_edges.empty() && pX == pY) { if(dirY == ED_SENSE) { xCoord.interval.start = toY; } else { xCoord.interval.end = toY; } } if(!xCoord.isValid()) return ""; // out.append(m_pStartVertex->getSeq().substr(xCoord.interval.start, xCoord.length())); // Determine if the string should go to the end of the last vertex // in the path size_t stop = m_edges.size(); // The first edge is always in correct frame of reference // so the comp is EC_SAME. This variable tracks where the // string that is being added is different from the starting sequence // and needs to be flipped EdgeComp currComp = EC_SAME; // If the walk direction is antisense, we reverse every component and then // reverse the entire string to generate the final string bool reverseAll = !m_edges.empty() && m_edges[0]->getDir() == ED_ANTISENSE; if(reverseAll) out = reverse(out); for(size_t i = 0; i < stop; ++i) { Edge* pYZ = m_edges[i]; bool isLast = i == (stop - 1); if(!isLast) { // Append the extension string without modification std::string edge_str = pYZ->getLabel(); assert(edge_str.size() != 0); if(currComp == EC_REVERSE) edge_str = reverseComplement(edge_str); if(reverseAll) edge_str = reverse(edge_str); out.append(edge_str); } else { // const Edge* pZY = pYZ->getTwin(); // get the unmatched coordinates on pY SeqCoord unmatched = pZY->getMatchCoord().complement(); // Now, we have to shrink the unmatched interval on Y to // only incude up to toY if(dirY == ED_SENSE) unmatched.interval.start = toY; else unmatched.interval.end = toY; if(!unmatched.isValid()) return ""; std::string seq = unmatched.getSubstring(pY->getStr()); if(pYZ->getComp() != currComp) seq = reverseComplement(seq); if(reverseAll) seq = reverse(seq); out.append(seq); } // Calculate the next comp, between X and Z EdgeComp ecYZ = pYZ->getComp(); EdgeComp ecXZ; if(ecYZ == EC_SAME) ecXZ = currComp; else ecXZ = !currComp; currComp = ecXZ; } if(reverseAll) out = reverse(out); return out; }
// Return the length of the sequence size_t Edge::getSeqLen() const { SeqCoord unmatched = m_pTwin->getMatchCoord().complement(); return unmatched.length(); }
std::string SeqCoord::getComplementString(const std::string& str) const { SeqCoord comp = complement(); return comp.getSubstring(str); }