// Translate the SeqCoord c from the frame of coord[0] to coord[1]
SeqCoord Match::translate(const SeqCoord& c) const
{
	// In overlap with indelsm the coord[] are not the same length
    // assert(coord[0].length() == coord[1].length()); // ensure translation is valid
    SeqCoord out;
    out.seqlen = coord[1].seqlen;
	
	//The offset of start and end should be adjusted according to the indels.
    out.interval.start = c.interval.start + calculateTranslation();
	out.interval.end = c.interval.end + calculateTranslationEnd();
			
    if(isRC())
        out.flip();

	// the offset t is not accurate under indels overlap
	// if(out.interval.end >= out.seqlen)
		// out.interval.end = out.seqlen-1;

	// if(out.interval.start <0)
		// out.interval.start = 0;


	// assert(out.interval.start>=0 && out.interval.start<out.interval.end);
		
    return out;
}
Exemple #2
0
// Attempt to resolve a predicted overlap between s1 and s2
// Returns true if there overlap was found and the overhang of s2 is placed in outString
bool ScaffoldRecord::overlapResolve(const ResolveParams& params, const std::string& s1, const std::string& s2, 
                                    const ScaffoldLink& link, std::string& outString) const
{
    // Attempt to find an overlap between these sequences
    int expectedOverlap = -1 * link.distance;

    // If the maximum overlap was not set, set it to the expected overlap * 3 stddev
    int upperBound = 0;
    if(params.maxOverlap == -1)
        upperBound = static_cast<int>(expectedOverlap + 3.0f * link.stdDev);
    else
        upperBound = params.maxOverlap;
    
    // Calculate the best match
    Match match;
    bool overlapFound = OverlapTools::boundedOverlapDP(s1, s2, params.minOverlap, upperBound, params.maxErrorRate, match);
    if(overlapFound)
    {
        SeqCoord overlapCoord = match.coord[1];
        SeqCoord overhangCoord = overlapCoord.complement();
        outString = overhangCoord.getSubstring(s2);
        return true;
    }
    else
    {
        return false;
    }
}
Exemple #3
0
// Return a seqcoord representing the complement of the interval
// For example if the seqcoord represents the matched portion of a string, 
// this returns a seqcoord of the unmatched portion
SeqCoord SeqCoord::complement() const
{
    SeqCoord out;
    out.seqlen = seqlen;

    if(isFull())
    {
        out.setEmpty();
    }
    else if(isEmpty())
    {
        out.setFull();
    }
    else if(isLeftExtreme())
    {
        out.interval.start = std::max(interval.start, interval.end) + 1;
        out.interval.end = out.seqlen - 1;
    }
    else
    {
        assert(isRightExtreme());
        out.interval.start = 0;
        out.interval.end = std::min(interval.start, interval.end) - 1;
    }
    assert(out.isValid());
    return out;
}
// Calculation the translation offset to shift
// a coord[1] position to a coord[0]. This must be calculated
// using canonical coordinates
int Match::calculateInverseTranslation() const
{
    if(!isRC())
        return coord[0].interval.start - coord[1].interval.start;
    else
    {
        SeqCoord f = coord[0];
        f.flip();
        return f.interval.start - coord[1].interval.start;
    }
}
int Match::calculateTranslationEnd() const
{
    if(!isRC())
        return coord[1].interval.end - coord[0].interval.end;
    else
    {
        SeqCoord f = coord[1];
        f.flip();
        return f.interval.end - coord[0].interval.end;
    }
}
// Get the edge's label
std::string Edge::getLabel() const
{
    const Edge* pTwin = getTwin();
    const Vertex* pEndpoint = m_pEnd;
    
    // get the unmatched coordinates in V2
    SeqCoord unmatched = pTwin->getMatchCoord().complement();
    std::string seq = unmatched.getSubstring(pEndpoint->getStr());

    if(getComp() == EC_REVERSE)
        seq = reverseComplement(seq);

    return seq;
}
// Translate the SeqCoord c from the frame of coord[1] to coord[0]
SeqCoord Match::inverseTranslate(const SeqCoord& c) const
{
    // assert(c.isExtreme());    
    SeqCoord out;
    out.seqlen = coord[0].seqlen; //seqlen was extended
    out.interval.start = c.interval.start + calculateInverseTranslation();
	out.interval.end = c.interval.end + calculateInverseTranslationEnd();
	
    if(isRC())
        out.flip();

	// if((int)c.length() !=(int)out.length())
		// std::cout << c.length() << "\t"<< out.length() <<"\n";

	// assert(out.interval.start>=0 && out.interval.start<out.interval.end);

    return out;
}
Exemple #8
0
// Attempt to resolve a predicted overlap between s1 and s2
// Returns true if there overlap was found and the overhang of s2 is placed in outString
bool ScaffoldRecord::overlapResolve(const ResolveParams& params, const std::string& s1, const std::string& s2, 
                                    const ScaffoldLink& link, std::string& outString) const
{
    // Attempt to find an overlap between these sequences
    int expectedOverlap = -1 * link.distance;

#ifdef DEBUGRESOLVE
    std::cout << "Attempting overlap resolve of link to " << link.endpointID << " expected distance: " << link.distance << " orientation: " << link.edgeData.getComp() << "\n";
#endif


    // If the maximum overlap was not set, set it to the expected overlap * 3 stddev
    int upperBound = 0;
    if(params.maxOverlap == -1)
        upperBound = static_cast<int>(expectedOverlap + 3.0f * link.stdDev);
    else
        upperBound = params.maxOverlap;
    
    // Calculate the best match
    Match match;
    bool overlapFound = OverlapTools::boundedOverlapDP(s1, s2, params.minOverlap, upperBound, params.maxErrorRate, match);
    if(overlapFound)
    {
#ifdef DEBUGRESOLVE
        std::cout << "Overlap found, length: " << match.coord[1].length() << "\n";
#endif
        SeqCoord overlapCoord = match.coord[1];
        SeqCoord overhangCoord = overlapCoord.complement();
        outString = overhangCoord.getSubstring(s2);
        return true;
    }
    else
    {
        return false;
    }
}
Exemple #9
0
// Get the substring of the full path string starting from position fromX
// to position toY on the first and last vertices, respectively.
// dirX is the direction along contig X towards vertex Y, vis-versa for dirY
std::string SGWalk::getFragmentString(const Vertex* pX, const Vertex* pY,
                                      int fromX, int toY, 
                                      EdgeDir dirX, EdgeDir dirY) const
{
    std::string out;

    // Calculate the portion of X that we should include in the string
    // If dirX is SENSE, we take the everything after position fromX
    // otherwise we take everything up to and including fromX
    SeqCoord xCoord(0,0,pX->getSeqLen());

    if(dirX == ED_SENSE)
    {
        xCoord.interval.start = fromX;
        xCoord.interval.end = pX->getSeqLen() - 1;
    }
    else
    {
        xCoord.interval.start = 0;
        xCoord.interval.end = fromX;
    }

    // Handle the trivial case where pX == pY and the walk is found immediately
    if(m_edges.empty() && pX == pY)
    {
        if(dirY == ED_SENSE)
        {
            xCoord.interval.start = toY;
        }
        else
        {
            xCoord.interval.end = toY;
        }
    }

    if(!xCoord.isValid())
        return "";

    //
    out.append(m_pStartVertex->getSeq().substr(xCoord.interval.start, xCoord.length()));

    // Determine if the string should go to the end of the last vertex
    // in the path
    size_t stop = m_edges.size();

    // The first edge is always in correct frame of reference 
    // so the comp is EC_SAME. This variable tracks where the 
    // string that is being added is different from the starting sequence
    // and needs to be flipped
    EdgeComp currComp = EC_SAME;

    // If the walk direction is antisense, we reverse every component and then
    // reverse the entire string to generate the final string
    bool reverseAll = !m_edges.empty() && m_edges[0]->getDir() == ED_ANTISENSE;
    if(reverseAll)
        out = reverse(out);

    for(size_t i = 0; i < stop; ++i)
    {
        Edge* pYZ = m_edges[i];
        bool isLast = i == (stop - 1);

        if(!isLast)
        {
            // Append the extension string without modification
            std::string edge_str = pYZ->getLabel();
            assert(edge_str.size() != 0);
            if(currComp == EC_REVERSE)
                edge_str = reverseComplement(edge_str);

            if(reverseAll)
                edge_str = reverse(edge_str);
            out.append(edge_str);
        }
        else
        {
            // 
            const Edge* pZY = pYZ->getTwin();
            
            // get the unmatched coordinates on pY
            SeqCoord unmatched = pZY->getMatchCoord().complement();

            // Now, we have to shrink the unmatched interval on Y to
            // only incude up to toY
            if(dirY == ED_SENSE)
                unmatched.interval.start = toY;
            else
                unmatched.interval.end = toY;

            if(!unmatched.isValid())
                return "";

            std::string seq = unmatched.getSubstring(pY->getStr());
            if(pYZ->getComp() != currComp)
                seq = reverseComplement(seq);
            
            if(reverseAll)
                seq = reverse(seq);
            out.append(seq);
        }

        // Calculate the next comp, between X and Z
        EdgeComp ecYZ = pYZ->getComp();
        EdgeComp ecXZ;
        if(ecYZ == EC_SAME)
            ecXZ = currComp;
        else
            ecXZ = !currComp;

        currComp = ecXZ;
    }

    if(reverseAll)
        out = reverse(out);
    return out;
}
// Return the length of the sequence
size_t Edge::getSeqLen() const
{
    SeqCoord unmatched = m_pTwin->getMatchCoord().complement();
    return unmatched.length();
}
Exemple #11
0
std::string SeqCoord::getComplementString(const std::string& str) const
{
    SeqCoord comp = complement();
    return comp.getSubstring(str);
}