Example #1
0
// Construct the extension string corresponding to the path
std::string SGWalk::getString(SGWalkType type) const
{
    std::string out;

    if(type == SGWT_START_TO_END || type == SGWT_INTERNAL)
    {
        out.append(m_pStartVertex->getSeq().toString());
    }

    // Determine if the string should go to the end of the last vertex
    // in the path
    size_t stop = m_edges.size();

    // The first edge is always in correct frame of reference 
    // so the comp is EC_SAME. This variable tracks where the 
    // string that is being added is different from the starting sequence
    // and needs to be flipped
    EdgeComp currComp = EC_SAME;

    // If the walk direction is antisense, we reverse every component and then
    // reverse the entire string to generate the final string
    bool reverseAll = !m_edges.empty() && m_edges[0]->getDir() == ED_ANTISENSE;
    if(reverseAll)
        out = reverse(out);

    for(size_t i = 0; i < stop; ++i)
    {
        Edge* pYZ = m_edges[i];
        
        // Append in the extension string
        std::string edge_str = pYZ->getLabel();
        assert(edge_str.size() != 0);
        if(currComp == EC_REVERSE)
            edge_str = reverseComplement(edge_str);

        if(reverseAll)
            edge_str = reverse(edge_str);

        // Calculate the next comp, between X and Z
        EdgeComp ecYZ = pYZ->getComp();
        EdgeComp ecXZ;
        if(ecYZ == EC_SAME)
            ecXZ = currComp;
        else
            ecXZ = !currComp;

        out.append(edge_str);
        currComp = ecXZ;
    }

    // If we want the internal portion of the string (which does not contain the endpoints
    // perform the truncation now. This needs to be done before the reversal.
    if(type == SGWT_INTERNAL)
    {
        Edge* pFirstEdge = getFirstEdge();
        Edge* pLastEdge = getLastEdge();
        if(pFirstEdge == NULL || pLastEdge == NULL)
        {
            out.clear();
        }
        else
        {
            Vertex* pStart = m_pStartVertex;
            Vertex* pLast = getLastVertex();
            int start = pStart->getSeqLen() - pFirstEdge->getMatchLength();
            int end = out.size() - (pLast->getSeqLen() - pLastEdge->getMatchLength());

            if(end <= start)
                out.clear();
            else
            {
                std::string ss = out.substr(start, end - start);
                out = ss;
            }
        }
    }

    if(out.empty())
        std::cout << "No output for walk: " << pathSignature() << "\n";

    if(reverseAll)
        out = reverse(out);

    // truncate 
    return out;
}
Example #2
0
// Construct the extension string corresponding to the path
std::string SGWalk::getString(SGWalkType type, SGWalkVertexPlacementVector* pPlacementVector) const
{
    std::string out;

    // Append the full length of the starting vertex to the walk
    if(type == SGWT_START_TO_END || type == SGWT_INTERNAL)
    {
        out.append(m_pStartVertex->getSeq().toString());

        // Add the first record to the placement vector if required
        if(pPlacementVector != NULL)
        {
            SGWalkVertexPlacement firstPlace;
            firstPlace.pVertex = m_pStartVertex;
            firstPlace.position = 0; // 0-based coordinates
            firstPlace.isRC = false;
            pPlacementVector->push_back(firstPlace);
        }
    }

    // Determine if the string should go to the end of the last vertex
    // in the path
    size_t stop = m_edges.size();

    // The first edge is always in correct frame of reference 
    // so the comp is EC_SAME. This variable tracks where the 
    // string that is being added is different from the starting sequence
    // and needs to be flipped
    EdgeComp currComp = EC_SAME;

    // If the walk direction is antisense, we reverse every component and then
    // reverse the entire string to generate the final string
    bool reverseAll = !m_edges.empty() && m_edges[0]->getDir() == ED_ANTISENSE;
    if(reverseAll)
        out = reverse(out);
    
    for(size_t i = 0; i < stop; ++i)
    {
        Edge* pYZ = m_edges[i];
        // Append in the extension string
        std::string edge_str = pYZ->getLabel();
        assert(edge_str.size() != 0);

        // Determine whether this node is reverse complement wrt the string
        // we are building
        if(currComp == EC_REVERSE)
            edge_str = reverseComplement(edge_str);

        if(reverseAll)
            edge_str = reverse(edge_str);

        // Calculate the next comp, between X and Z
        EdgeComp ecYZ = pYZ->getComp();
        EdgeComp ecXZ;
        if(ecYZ == EC_SAME)
            ecXZ = currComp;
        else
            ecXZ = !currComp;

        out.append(edge_str);
        
        // Add this record into the placement vector
        if(pPlacementVector != NULL)
        {
            SGWalkVertexPlacement placement;
            placement.pVertex = pYZ->getEnd();
            placement.isRC = ecXZ == EC_REVERSE;
            placement.position = out.size() - pYZ->getEnd()->getSeqLen();
            pPlacementVector->push_back(placement);
        }

        currComp = ecXZ;
    }

    // If we want the internal portion of the string (which does not contain the endpoints
    // perform the truncation now. This needs to be done before the reversal.
    if(type == SGWT_INTERNAL)
    {
        if(pPlacementVector != NULL)
        {
            std::cerr << "Error: Vertex placement not supported for SGWT_INTERNAL walk types\n";
            exit(EXIT_FAILURE);
        }

        Edge* pFirstEdge = getFirstEdge();
        Edge* pLastEdge = getLastEdge();
        if(pFirstEdge == NULL || pLastEdge == NULL)
        {
            out.clear();
        }
        else
        {
            Vertex* pStart = m_pStartVertex;
            Vertex* pLast = getLastVertex();
            int start = pStart->getSeqLen() - pFirstEdge->getMatchLength();
            int end = out.size() - (pLast->getSeqLen() - pLastEdge->getMatchLength());

            if(end <= start)
                out.clear();
            else
            {
                std::string ss = out.substr(start, end - start);
                out = ss;
            }
        }
    }

    if(out.empty())
        std::cout << "No output for walk: " << pathSignature() << "\n";

    if(reverseAll)
    {
        out = reverse(out);

        // Reverse the placement vector too, including flipping the alignment coordinates
        if(pPlacementVector != NULL)
        {
            std::reverse(pPlacementVector->begin(), pPlacementVector->end());
            for(size_t i = 0; i < pPlacementVector->size(); ++i)
            {
                SGWalkVertexPlacement& item = pPlacementVector->at(i);
                item.position = out.size() - item.position - item.pVertex->getSeqLen();
            }
        }
    }

    return out;
}