Exemplo n.º 1
0
//
// High level merge function that does not specify an edge
//
void Bigraph::mergeVertices(VertexID id1, VertexID id2)
{
    Vertex* pVert1 = getVertex(id1);

    // Get the edges from vertex1 to vertex2
    EdgePtrVec edgesTo = pVert1->findEdgesTo(id2);

    if(edgesTo.empty())
    {
        std::cerr << "mergeVertices: vertices are not connected\n";
        return;
    }

    if(edgesTo.size() > 1)
    {
        std::cerr << "mergeVertces: cannot merge because of ambigious edges\n";
        return;
    }

    // There is a single unique edge between the vertices
    Edge* mergeEdge = *edgesTo.begin();

    // Call the real merging function
    merge(pVert1, mergeEdge);
}
Exemplo n.º 2
0
bool SGIdenticalRemoveVisitor::visit(StringGraph* /*pGraph*/, Vertex* pVertex)
{
    if(!pVertex->isContained())
        return false;

    // Check if this vertex is identical to any other vertex
    EdgePtrVec neighborEdges = pVertex->getEdges();
    for(size_t i = 0; i < neighborEdges.size(); ++i)
    {
        Edge* pEdge = neighborEdges[i];
        Vertex* pOther = pEdge->getEnd();
        if(pVertex->getSeqLen() != pOther->getSeqLen())
            continue;
        
        Overlap ovr = pEdge->getOverlap();
        if(!ovr.isContainment() || ovr.getContainedIdx() != 0)
            continue;

        if(pVertex->getSeq() == pOther->getSeq())
        {
            pVertex->setColor(GC_BLACK);
            ++count;
            break;
        }
    }
            
    return false;
}
Exemplo n.º 3
0
// Simplify the graph by compacting edges in the given direction
void Bigraph::simplify(EdgeDir dir)
{
    bool graph_changed = true;
    while(graph_changed)
    {
        graph_changed = false;
        VertexPtrMapIter iter = m_vertices.begin(); 
        while(iter != m_vertices.end())
        {
            // Get the edges for this direction
            EdgePtrVec edges = iter->second->getEdges(dir);

            // If there is a single edge in this direction, merge the vertices
            // Don't merge singular self edges though
            if(edges.size() == 1 && !edges.front()->isSelf())
            {
                // Check that the edge back is singular as well
                Edge* pSingle = edges.front();
                Edge* pTwin = pSingle->getTwin();
                Vertex* pV2 = pSingle->getEnd();
                if(pV2->countEdges(pTwin->getDir()) == 1)
                {
                    merge(iter->second, pSingle);
                    graph_changed = true;
                }
            }

            ++iter;
        }
    } 
}
Exemplo n.º 4
0
//
// Flip a vertex
//
void Bigraph::flip(VertexID /*id*/)
{
    assert(false);
#if 0
    // TODO: update this code
    Vertex* pVertex = getVertex(id);
    EdgePtrVec edges = pVertex->getEdges();

    for(EdgePtrVecIter iter = edges.begin(); iter != edges.end(); ++iter)
    {
        // Get the old twin
        GraphEdgeType twin = iter->getTwin();
        
        GraphEdgeType flipped = *iter; 
        flipped.flip();

        // Remove the edge from the source ver
        pVertex->removeEdge(*iter);
        pVertex->addEdge(flipped);

        // Update the partner by deleting the old twin and 
        Vertex* pV2 = getVertex(twin.getStart());
        pV2->removeEdge(twin);
        pV2->addEdge(flipped.getTwin());
    }
#endif
}
Exemplo n.º 5
0
// Mark any nodes that either dont have edges or edges in only one direction for removal
bool SGChimericVisitor::visit(StringGraph* /*pGraph*/, Vertex* pVertex)
{
    // Check if this node is chimeric
    if (pVertex->countEdges(ED_SENSE) == 1 && pVertex->countEdges(ED_ANTISENSE) == 1 && pVertex->getSeqLen() <= m_minLength)
    {
        Edge* pPrevEdge = pVertex->getEdges(ED_ANTISENSE)[0];
        Edge* pNextEdge = pVertex->getEdges(ED_SENSE)[0];
        Vertex* pPrevVertex = pPrevEdge->getEnd();
        Vertex* pNextVertex = pNextEdge->getEnd();
        
        bool chimeric = true;
        if (chimeric)
            chimeric &= (pPrevVertex->countEdges(ED_SENSE) >= 2);
            //chimeric &= (pPrevVertex->countEdges(ED_SENSE) == 2 && pPrevVertex->countEdges(ED_ANTISENSE) == 1);
        if (chimeric)
            chimeric &= (pNextVertex->countEdges(ED_ANTISENSE) >= 2);
            //chimeric &= (pNextVertex->countEdges(ED_SENSE) == 1 && pNextVertex->countEdges(ED_ANTISENSE) == 2);
        if (chimeric)
        {
            // smallest?
            bool smallest = false;
            {
                EdgePtrVec edges = pPrevVertex->getEdges(ED_SENSE);
                for(size_t k = 0; k < edges.size(); ++k)
                {
                    if (edges[k]->getMatchLength() > pPrevEdge->getMatchLength() && edges[k]->getMatchLength() - pPrevEdge->getMatchLength() >= _delta)
                    {
                        smallest = true;
                    }
                }
            }
            {
                EdgePtrVec edges = pNextVertex->getEdges(ED_ANTISENSE);
                for(size_t k = 0; k < edges.size(); ++k)
                {
                    if (edges[k]->getMatchLength() > pNextEdge->getMatchLength() && edges[k]->getMatchLength() - pNextEdge->getMatchLength() >= _delta)
                    {
                        smallest = true;
                    }
                }
            }
            chimeric &= smallest;
        }
        if (chimeric)
        {
            //bool smallest = false;
            //chimeric &= smallest;
        }
        if (chimeric)
        {
            //std::cout << "chimeric\t" << pVertex->getID() << "\t" << _delta << "\t" << pVertex->getSeq() << "\n";
            pVertex->setColor(GC_BLACK);
            ++num_chimeric;
            return true;
        }
    }

    return false;
}
Exemplo n.º 6
0
float WingedUtil :: averageLength (const WingedMesh& mesh, const EdgePtrVec& edges) {
  assert (edges.size () > 0);

  float l = 0.0f;
  for (const WingedEdge* e : edges) {
    l += e->length (mesh);
  }
  return l / float (edges.size ());
}
Exemplo n.º 7
0
//
// Get the edges in a particular direction
// This preserves the ordering of the edges
//
EdgePtrVec Vertex::getEdges(EdgeDir dir) const
{
    EdgePtrVecConstIter iter = m_edges.begin();
    EdgePtrVec outEdges;
    for(; iter != m_edges.end(); ++iter)
    {
        if((*iter)->getDir() == dir)
            outEdges.push_back(*iter);
    }
    return outEdges;
}
Exemplo n.º 8
0
// Find edges to the specified vertex
EdgePtrVec Vertex::findEdgesTo(VertexID id)
{
    EdgePtrVecConstIter iter = m_edges.begin();
    EdgePtrVec outEdges;
    for(; iter != m_edges.end(); ++iter)
    {
        if((*iter)->getEndID() == id)
            outEdges.push_back(*iter);
    }
    return outEdges;
}
Exemplo n.º 9
0
 EdgePtrVec toEdgeVec () const {
   EdgePtrVec edges;
   for (WingedFace* f : this->faces) {
     for (WingedEdge& e : f->adjacentEdges ()) {
       if (e.isLeftFace (*f) || (this->faces.count (e.otherFace (*f)) == 0)) {
         edges.push_back (&e);
       }
     }
   }
   return edges;
 }
Exemplo n.º 10
0
//
// SGOverlapWriterVisitor - write all the overlaps in the graph to a file 
//
bool SGOverlapWriterVisitor::visit(StringGraph* /*pGraph*/, Vertex* pVertex)
{
    EdgePtrVec edges = pVertex->getEdges();
    for(size_t i = 0; i < edges.size(); ++i)
    {
        Overlap ovr = edges[i]->getOverlap();
        if(ovr.id[0] < ovr.id[1])
            m_fileHandle << ovr << "\n";
    }
    return false;
}
Exemplo n.º 11
0
//
// Merge two vertices along the specified edge
//
void Bigraph::merge(Vertex* pV1, Edge* pEdge)
{
    Vertex* pV2 = pEdge->getEnd();
    //std::cout << "Merging " << pV1->getID() << " with " << pV2->getID() << "\n";

    // Merge the data
    pV1->merge(pEdge);

    // Get the twin edge (the edge in v2 that points to v1)
    Edge* pTwin = pEdge->getTwin();

    // Ensure v2 has the twin edge
    assert(pV2->hasEdge(pTwin));

    // Get the edge set opposite of the twin edge (which will be the new edges in this direction for V1)
    EdgePtrVec transEdges = pV2->getEdges(!pTwin->getDir());

    // Move the edges from pV2 to pV1
    for(EdgePtrVecIter iter = transEdges.begin(); iter != transEdges.end(); ++iter)
    {
        Edge* pTransEdge = *iter;

        // Remove the edge from V2, this does not destroy the edge
        pV2->removeEdge(pTransEdge);

        // Join pEdge to the start of transEdge
        // This updates the starting point of pTransEdge to be V1
        // This calls Edge::extend on the twin edge
        pTransEdge->join(pEdge);
        assert(pTransEdge->getDir() == pEdge->getDir());
        pV1->addEdge(pTransEdge); // add to V1

        // Notify the edges they have been updated
        pTransEdge->update();
        pTransEdge->getTwin()->update();
    }

    // Remove the edge from pV1 to pV2
    pV1->removeEdge(pEdge);
    delete pEdge;
    pEdge = 0;

    // Remove the edge from pV2 to pV1
    pV2->removeEdge(pTwin);
    delete pTwin;
    pEdge = 0;

    // Remove V2
    // It is guarenteed to not be connected
    removeIslandVertex(pV2);
    //validate();
}
void StringGraphGenerator::resetContainmentFlags(Vertex* pVertex)
{
    if(!pVertex->isContained())
        return;
    pVertex->setContained(false);
    // Set the containment flag for all the vertices that have containment edges with this vertex
    EdgePtrVec edges = pVertex->getEdges();
    for(size_t i = 0; i < edges.size(); ++i)
    {
        Edge* pEdge = edges[i];
        if(pEdge->getOverlap().isContainment())
            pEdge->getEnd()->setContained(true);
    }
}
Exemplo n.º 13
0
bool SGMaximalOverlapVisitor::visit(StringGraph* /*pGraph*/, Vertex* pVertex)
{
    bool modified = false;

    typedef bool(*PredicateEdge)(const Edge*);
    PredicateEdge predicateEdgeArray[ED_COUNT] = {SGMaximalOverlapVisitor::isSenseEdge, SGMaximalOverlapVisitor::isAntiSenseEdge};
    for(size_t idx = 0; idx < ED_COUNT; idx++)
    {
        EdgeDir dir = EDGE_DIRECTIONS[idx];
        EdgePtrVec edges = pVertex->getEdges(dir); // These edges are already sorted by overlap length
        if(edges.empty())
            continue;
            //return false;

        for(size_t i = 1; i < edges.size(); ++i)
        {
            if (edges[i]->getMatchLength() == edges[0]->getMatchLength())
                continue;

            bool valid = false;

            //EdgePtrVec redges = edges[i]->getEnd()->getEdges(EDGE_DIRECTIONS[ED_COUNT - idx - 1]);
            EdgePtrVec redges = edges[i]->getEnd()->getEdges();
            EdgePtrVec::iterator last = std::remove_if(redges.begin(), redges.end(), predicateEdgeArray[idx]);
            redges.resize(std::distance(redges.begin(), last));
            assert(!redges.empty());
            for(size_t j = 0; j < redges.size(); ++j)
            {
                if (redges[j]->getEndID() == pVertex->getID() && edges[0]->getMatchLength() - edges[i]->getMatchLength() <= _delta)
                {
                    valid = true;
                }
            }

            if (!valid)
            {
                edges[i]->setColor(GC_BLACK);
                edges[i]->getTwin()->setColor(GC_BLACK);
                modified = true;
            }
        }
    }

    return modified;
}
Exemplo n.º 14
0
bool SGRemodelVisitor::visit(StringGraph* pGraph, Vertex* pVertex)
{
    bool graph_changed = false;

    // Construct the set of overlaps reachable within the current parameters
    CompleteOverlapSet vertexOverlapSet(pVertex, m_remodelER, pGraph->getMinOverlap());
    SGAlgorithms::EdgeDescOverlapMap containMap;
    vertexOverlapSet.computeIrreducible(NULL, &containMap);
    SGAlgorithms::EdgeDescOverlapMap irreducibleMap = vertexOverlapSet.getOverlapMap();

    // Construct the set of edges that should be added
    EdgePtrVec edges = pVertex->getEdges();
    for(size_t i = 0; i < edges.size(); ++i)
    {
        SGAlgorithms::EdgeDescOverlapMap::iterator iter = irreducibleMap.find(edges[i]->getDesc());
        if(iter != irreducibleMap.end())
        {
            // Edge exists already
            irreducibleMap.erase(iter);
        }
        else
        {
            edges[i]->setColor(GC_BLACK);
            edges[i]->getTwin()->setColor(GC_BLACK);
            //std::cout << "Marking edge for deletion: " << edges[i]->getOverlap() << "\n";
        }
    }

    // Add remaining edges in the irreducible map
    SGAlgorithms::EdgeDescOverlapMap::iterator iter;
    for(iter = irreducibleMap.begin(); iter != irreducibleMap.end(); ++iter)
    {
        Overlap& ovr = iter->second;
        //std::cout << "Adding overlap: " << ovr << "\n";
        SGAlgorithms::createEdgesFromOverlap(pGraph, ovr, false);
        graph_changed = true;
    }

    // Update the containment flags in the graph to ensure that we can subsequently remove containment verts
    SGAlgorithms::updateContainFlags(pGraph, pVertex, containMap);

    return graph_changed;
}
Exemplo n.º 15
0
bool SGContainRemoveVisitor::visit(StringGraph* pGraph, Vertex* pVertex)
{

    if(!pVertex->isContained())
        return false;

    //cout << pVertex->getID() << endl; // debug
    // Add any new irreducible edges that exist when pToRemove is deleted
    // from the graph
    EdgePtrVec neighborEdges = pVertex->getEdges();
    
    // If the graph has been transitively reduced, we have to check all
    // the neighbors to see if any new edges need to be added. If the graph is a
    // complete overlap graph we can just remove the edges to the deletion vertex
    if(!pGraph->hasTransitive() && !pGraph->isExactMode())
    {
        // This must be done in order of edge length or some transitive edges
        // may be created
        EdgeLenComp comp;
        std::sort(neighborEdges.begin(), neighborEdges.end(), comp);

        for(size_t j = 0; j < neighborEdges.size(); ++j)
        {
            Vertex* pRemodelVert = neighborEdges[j]->getEnd();
            Edge* pRemodelEdge = neighborEdges[j]->getTwin();
            SGAlgorithms::remodelVertexForExcision(pGraph, 
                                                   pRemodelVert, 
                                                   pRemodelEdge);
        }
    }
            
    // Delete the edges from the graph
    for(size_t j = 0; j < neighborEdges.size(); ++j)
    {
        Vertex* pRemodelVert = neighborEdges[j]->getEnd();
        Edge* pRemodelEdge = neighborEdges[j]->getTwin();
        pRemodelVert->deleteEdge(pRemodelEdge);
        pVertex->deleteEdge(neighborEdges[j]);
    }
    pVertex->setColor(GC_BLACK);
    return false;
}
Exemplo n.º 16
0
// Construct the walk structure from a vector of edges
SGWalk::SGWalk(const EdgePtrVec& edgeVec, bool bIndexWalk) : m_extensionDistance(0), m_extensionFinished(false)

{
    assert(!edgeVec.empty());

    if(bIndexWalk)
        m_pWalkIndex = new WalkIndex;
    else
        m_pWalkIndex = NULL;

    // The start vector is the start vertex of the first edge
    Edge* first = edgeVec.front();
    m_pStartVertex = first->getStart();

    for(EdgePtrVec::const_iterator iter = edgeVec.begin();
                                   iter != edgeVec.end();
                                   ++iter)
    {
        addEdge(*iter);
    }
}
Exemplo n.º 17
0
//
// SGPairedOverlapVisitor - print a formatted report to stdout
// detailing how much overlap there is between both end of a paired
// read.
//
bool SGPairedOverlapVisitor::visit(StringGraph* /*pGraph*/, Vertex* /*pVertex*/)
{
#if 0
    Vertex* pPairSV = pVertex->getPairVertex();
    if(pPairSV == NULL)
        return false;

    EdgePtrVec edges = pVertex->getEdges();

    // Determine which vertices that are paired to pVertex
    // have a pair that overlaps with pPairVertex
    for(size_t i = 0; i < edges.size(); ++i)
    {
        Edge* pVWEdge = edges[i];
        Vertex* pW = pVWEdge->getEnd();
        Vertex* pPairW = pW->getPairVertex();
        if(pPairW == NULL)
            continue;

        EdgePtrVec ppw_edges = pPairW->findEdgesTo(pPairSV->getID());
        size_t overlap_len = pVWEdge->getMatchLength();

        if(pVWEdge->getComp() == EC_SAME)
        {
            if(ppw_edges.size() == 1)
            {
                Edge* pPPEdge = ppw_edges.front();
                size_t pair_overlap_len = pPPEdge->getMatchLength();
                printf("pairoverlap\t%s\t%s\t%zu\t%zu\n", pVertex->getID().c_str(), pW->getID().c_str(), overlap_len, pair_overlap_len);
            }
            else
            {
                printf("pairoverlap\t%s\t%s\t%zu\t%d\n", pVertex->getID().c_str(), pW->getID().c_str(), overlap_len, 0);
            }
        }
    }
#endif
    return false;
}
Exemplo n.º 18
0
bool SGPEConflictRemover::visit(StringGraph* pGraph, Vertex* pVertex)
{
    (void)pGraph;
    (void)pVertex;
    for(size_t idx = 0; idx < ED_COUNT; idx++)
    {
        EdgeDir dir = EDGE_DIRECTIONS[idx];
        EdgePtrVec edges = pVertex->getEdges(dir);

        if(edges.size() > 1)
        {
            bool hasTrusted = false;
            for(size_t j = 0; j < edges.size(); ++j)
            {
                if(edges[j]->isTrusted)
                {
                    hasTrusted = true;
                }
            }
            
            if(hasTrusted)
            {
                for(size_t j = 0; j < edges.size(); ++j)
                {
                    if(!edges[j]->isTrusted)
                    {
                        edges[j]->setColor(GC_BLACK);
                        edges[j]->getTwin()->setColor(GC_BLACK);
                    }
                    if(edges[j]->getComp() == EC_SAME)
                        num_same++;
                    else
                        num_diff++;
                }
            }
        }    
    }
    return 0;
}
Exemplo n.º 19
0
bool SGEdgeStatsVisitor::visit(StringGraph* pGraph, Vertex* pVertex)
{
    const int MIN_OVERLAP = pGraph->getMinOverlap();
    const double MAX_ERROR = pGraph->getErrorRate();

    static int visited = 0;
    ++visited;
    if(visited % 50000 == 0)
        std::cout << "visited: " << visited << "\n";

    // Add stats for the found overlaps
    EdgePtrVec edges = pVertex->getEdges();
    for(size_t i = 0; i < edges.size(); ++i)
    {
        Overlap ovr = edges[i]->getOverlap();
        int numDiff = ovr.match.countDifferences(pVertex->getStr(), edges[i]->getEnd()->getStr());
        int overlapLen = ovr.match.getMinOverlapLength();
        addOverlapToCount(overlapLen, numDiff, foundCounts);
    }
        
    // Explore the neighborhood around this graph for potentially missing overlaps
    CandidateVector candidates = getMissingCandidates(pGraph, pVertex, MIN_OVERLAP);
    MultiOverlap addedMO(pVertex->getID(), pVertex->getStr());
    for(size_t i = 0; i < candidates.size(); ++i)
    {
        Candidate& c = candidates[i];
        int numDiff = c.ovr.match.countDifferences(pVertex->getStr(), c.pEndpoint->getStr());
        double error_rate = double(numDiff) / double(c.ovr.match.getMinOverlapLength());

        if(error_rate < MAX_ERROR)
        {
            int overlapLen = c.ovr.match.getMinOverlapLength();
            addOverlapToCount(overlapLen, numDiff, missingCounts);
        }
    }
    
    return false;
}
Exemplo n.º 20
0
void addNeighborsToSubgraph(Vertex* pCurrVertex, StringGraph* pSubgraph, int span)
{
    if(span <= 0)
        return;

    // These are the edges in the main graph
    EdgePtrVec edges = pCurrVertex->getEdges();
    for(size_t i = 0; i < edges.size(); ++i)
    {
        if(edges[i]->getColor() != GC_BLACK)
        {
            Vertex* pY = edges[i]->getEnd();
            copyVertexToSubgraph(pSubgraph, pY);
            Overlap ovr = edges[i]->getOverlap();
            SGAlgorithms::createEdgesFromOverlap(pSubgraph, ovr, true);
            edges[i]->setColor(GC_BLACK);
            edges[i]->getTwin()->setColor(GC_BLACK);

            // Recurse
            addNeighborsToSubgraph(pY, pSubgraph, span - 1);
        }
    }
}
Exemplo n.º 21
0
// Explore the neighborhood around a vertex looking for missing overlaps
SGEdgeStatsVisitor::CandidateVector SGEdgeStatsVisitor::getMissingCandidates(StringGraph* /*pGraph*/, 
                                                                             Vertex* pVertex, 
                                                                             int minOverlap) const
{
    CandidateVector out;

    // Mark the vertices that are reached from this vertex as black to indicate
    // they already are overlapping
    EdgePtrVec edges = pVertex->getEdges();
    for(size_t i = 0; i < edges.size(); ++i)
    {
        edges[i]->getEnd()->setColor(GC_BLACK);
    }
    pVertex->setColor(GC_BLACK);

    for(size_t i = 0; i < edges.size(); ++i)
    {
        Edge* pXY = edges[i];
        EdgePtrVec neighborEdges = pXY->getEnd()->getEdges();
        for(size_t j = 0; j < neighborEdges.size(); ++j)
        {
            Edge* pYZ = neighborEdges[j];
            if(pYZ->getEnd()->getColor() != GC_BLACK)
            {
                // Infer the overlap object from the edges
                Overlap ovrXY = pXY->getOverlap();
                Overlap ovrYZ = pYZ->getOverlap();

                if(SGAlgorithms::hasTransitiveOverlap(ovrXY, ovrYZ))
                {
                    Overlap ovr_xz = SGAlgorithms::inferTransitiveOverlap(ovrXY, ovrYZ);
                    if(ovr_xz.match.getMinOverlapLength() >= minOverlap)
                    {
                        out.push_back(Candidate(pYZ->getEnd(), ovr_xz));
                        pYZ->getEnd()->setColor(GC_BLACK);
                    }
                }
            }
        }
    }

    // Reset colors
    for(size_t i = 0; i < edges.size(); ++i)
        edges[i]->getEnd()->setColor(GC_WHITE);
    pVertex->setColor(GC_WHITE);
    for(size_t i = 0; i < out.size(); ++i)
        out[i].pEndpoint->setColor(GC_WHITE);
    return out;
}
Exemplo n.º 22
0
// Find bubbles (nodes where there is a split and then immediate rejoin) and mark them for removal
bool SGBubbleVisitor::visit(StringGraph* /*pGraph*/, Vertex* pVertex)
{
    bool bubble_found = false;
    for(size_t idx = 0; idx < ED_COUNT; idx++)
    {
        EdgeDir dir = EDGE_DIRECTIONS[idx];
        EdgePtrVec edges = pVertex->getEdges(dir);
        if(edges.size() > 1)
        {
            Vertex* pStart = pVertex;
            Vertex* pEnd = NULL;
            // Check the vertices
            for(size_t i = 0; i < edges.size(); ++i)
            {
                Edge* pVWEdge = edges[i];
                Vertex* pWVert = pVWEdge->getEnd();

                // Get the edges from w in the same direction
                EdgeDir transDir = !pVWEdge->getTwinDir();
                EdgePtrVec wEdges = pWVert->getEdges(transDir);

                if(pWVert->getColor() == GC_RED)
                    return false;

                // If the bubble has collapsed, there should only be one edge
                if(wEdges.size() == 1)
                {
                    Vertex* pBubbleEnd = wEdges.front()->getEnd();
                    if(pBubbleEnd->getColor() == GC_RED)
                        return false;
                }
            }

            // Mark the vertices
            for(size_t i = 0; i < edges.size(); ++i)
            {
                Edge* pVWEdge = edges[i];
                Vertex* pWVert = pVWEdge->getEnd();

                // Get the edges from w in the same direction
                EdgeDir transDir = !pVWEdge->getTwinDir();
                EdgePtrVec wEdges = pWVert->getEdges(transDir);

                // If the bubble has collapsed, there should only be one edge
                if(wEdges.size() == 1)
                {
                    Vertex* pBubbleEnd = wEdges.front()->getEnd();
                    if(pBubbleEnd->getColor() == GC_BLACK)
                    {
                        // The endpoint has been visited, set this vertex as needing removal
                        // and set the endpoint as unvisited
                        pWVert->setColor(GC_RED);
                        bubble_found = true;
                        pEnd = pBubbleEnd;
                    }
                    else
                    {
                        pBubbleEnd->setColor(GC_BLACK);
                        pWVert->setColor(GC_BLUE);
                    }
                }
            }
            
            // Unmark vertices
            for(size_t i = 0; i < edges.size(); ++i)
            {
                Edge* pVWEdge = edges[i];
                Vertex* pWVert = pVWEdge->getEnd();

                // Get the edges from w in the same direction
                EdgeDir transDir = !pVWEdge->getTwinDir();
                EdgePtrVec wEdges = pWVert->getEdges(transDir);

                // If the bubble has collapsed, there should only be one edge
                if(wEdges.size() == 1)
                {
                    Vertex* pBubbleEnd = wEdges.front()->getEnd();
                    pBubbleEnd->setColor(GC_WHITE);
                }
                if(pWVert->getColor() == GC_BLUE)
                    pWVert->setColor(GC_WHITE);
            }

            (void)pStart;
            (void)pEnd;
            if(bubble_found)
            {
                /*
                SGWalkVector walkVector;
                SGSearch::findWalks(pStart, pEnd, dir, 1000, 20, walkVector);
                
                if(walkVector.size() == 2)
                {
                    SGWalk& walk1 = walkVector[0];
                    SGWalk& walk2 = walkVector[1];

                    int len1 = walk1.getStartToEndDistance();
                    int len2 = walk2.getStartToEndDistance();
                    int diff = len1 - len2;
                    std::string type = "SNP";
                    if(diff != 0)
                    {
                        type = "INDEL";
                    }
                    std::cout << "Bubble " << pStart->getID() << " to " << pEnd->getID() << " is a "
                              << type << "(d: " << diff << ")\n";
                }
                */
                ++num_bubbles;
            }
        }
    }
    return bubble_found;
}
Exemplo n.º 23
0
// Find bubbles (nodes where there is a split and then immediate rejoin) and mark them for removal
bool SGBubbleEdgeVisitor::visit(StringGraph* /*pGraph*/, Vertex* pX)
{    
    bool bubble_found = false;
    for(size_t idx = 0; idx < ED_COUNT; idx++)
    {
        EdgeDir dir = EDGE_DIRECTIONS[idx];
        EdgePtrVec edges = pX->getEdges(dir);
        if(edges.size() == 2) // di-bubbles only for now
        {
            // Determine which edge has a shorter overlap to pX
            // Call the longer overlap pY, the shorter pZ
            Edge* pXY;
            Edge* pXZ;

            if(edges[0]->getOverlap().getOverlapLength(0) > edges[1]->getOverlap().getOverlapLength(0))
            {
                pXY = edges[0];
                pXZ = edges[1];
            }
            else if(edges[1]->getOverlap().getOverlapLength(0) > edges[0]->getOverlap().getOverlapLength(0))

            {
                pXY = edges[1];
                pXZ = edges[0];
            }
            else
            {
                break; // equal length overlaps, cannot be a bubble or else the vertices would be contained
            }
            
            // Mark the neighbors of pZ as the "target" vertices
            // if they can be reached by pY we mark pY as being unreliable and remove it
            typedef std::list<Vertex*> VertexPtrList;
            VertexPtrList targetList;

            EdgeDir targetDir = pXZ->getTransitiveDir();
            EdgePtrVec targetEdges = pXZ->getEnd()->getEdges(targetDir);
            for(size_t i = 0; i < targetEdges.size(); ++i)
                targetList.push_back(targetEdges[i]->getEnd());

            // Start exploring from pY
            ExploreQueue queue;
            Overlap ovrXY = pXY->getOverlap();
            EdgeDesc edXY = pXY->getDesc();
            queue.push(ExploreElement(edXY, ovrXY));

            int numSteps = 100;
            WARN_ONCE("USING FIXED NUMBER OF STEPS IN BUBBLE EDGE");
            while(!queue.empty() && numSteps-- > 0)
            {
                ExploreElement ee = queue.front();
                EdgeDesc& edXY = ee.ed;
                Vertex* pY = edXY.pVertex;
                Overlap& ovrXY = ee.ovr;

                queue.pop();

                // Check if Y is on the target list
                VertexPtrList::iterator iter = targetList.begin();
                while(iter != targetList.end())
                {
                    if(*iter == edXY.pVertex)
                        targetList.erase(iter++);
                    else
                        ++iter;
                }
                
                if(targetList.empty())
                    break;

                // Enqueue the neighbors of pY
                EdgeDir dirY = edXY.getTransitiveDir();
                EdgePtrVec edges = pY->getEdges(dirY);
                for(size_t i = 0; i < edges.size(); ++i)
                {
                    Edge* pEdge = edges[i];
                    Vertex* pZ = pEdge->getEnd();

                    // Compute the edgeDesc and overlap on pX for this edge
                    Overlap ovrYZ = pEdge->getOverlap();

                    if(SGAlgorithms::hasTransitiveOverlap(ovrXY, ovrYZ))
                    {
                        Overlap ovrXZ = SGAlgorithms::inferTransitiveOverlap(ovrXY, ovrYZ);
                        EdgeDesc edXZ = SGAlgorithms::overlapToEdgeDesc(pZ, ovrXZ);
                        queue.push(ExploreElement(edXZ, ovrXZ));
                    }
                }
            }

            if(targetList.empty())
            {
                // bubble found
                pXZ->getEnd()->deleteEdges();
                pXZ->getEnd()->setColor(GC_RED);
                bubble_found = true;
                ++num_bubbles;
            }
        }
    }
    return bubble_found;
}
Exemplo n.º 24
0
bool SGTransitiveReductionVisitor::visit(StringGraph* /*pGraph*/, Vertex* pVertex)
{
    size_t trans_count = 0;
    static const size_t FUZZ = 10; // see myers

    for(size_t idx = 0; idx < ED_COUNT; idx++)
    {
        EdgeDir dir = EDGE_DIRECTIONS[idx];
        EdgePtrVec edges = pVertex->getEdges(dir); // These edges are already sorted
        if(edges.size() == 0)
            continue;

        for(size_t i = 0; i < edges.size(); ++i)
            (edges[i])->getEnd()->setColor(GC_GRAY);

        Edge* pLongestEdge = edges.back();
        size_t longestLen = pLongestEdge->getSeqLen() + FUZZ;
        
        // Stage 1
        for(size_t i = 0; i < edges.size(); ++i)
        {
            Edge* pVWEdge = edges[i];
            Vertex* pWVert = pVWEdge->getEnd();

            EdgeDir transDir = !pVWEdge->getTwinDir();
            if(pWVert->getColor() == GC_GRAY)
            {
                EdgePtrVec w_edges = pWVert->getEdges(transDir);
                for(size_t j = 0; j < w_edges.size(); ++j)
                {
                    Edge* pWXEdge = w_edges[j];
                    size_t trans_len = pVWEdge->getSeqLen() + pWXEdge->getSeqLen();
                    if(trans_len <= longestLen)
                    {
                        if(pWXEdge->getEnd()->getColor() == GC_GRAY)
                        {
                            // X is the endpoint of an edge of V, therefore it is transitive
                            pWXEdge->getEnd()->setColor(GC_BLACK);
                        }
                    }
                    else
                        break;
                }
            }
        }
        
        // Stage 2
        for(size_t i = 0; i < edges.size(); ++i)
        {
            Edge* pVWEdge = edges[i];
            Vertex* pWVert = pVWEdge->getEnd();

            EdgeDir transDir = !pVWEdge->getTwinDir();
            EdgePtrVec w_edges = pWVert->getEdges(transDir);
            for(size_t j = 0; j < w_edges.size(); ++j)
            {
                Edge* pWXEdge = w_edges[j];
                size_t len = pWXEdge->getSeqLen();

                if(len < FUZZ || j == 0)
                {
                    if(pWXEdge->getEnd()->getColor() == GC_GRAY)
                    {
                        // X is the endpoint of an edge of V, therefore it is transitive
                        pWXEdge->getEnd()->setColor(GC_BLACK);
                    }
                }
                else
                {
                    break;
                }
            }
        }

        for(size_t i = 0; i < edges.size(); ++i)
        {
            if(edges[i]->getEnd()->getColor() == GC_BLACK)
            {
                // Mark the edge and its twin for removal
                if(edges[i]->getColor() != GC_BLACK || edges[i]->getTwin()->getColor() != GC_BLACK)
                {
                    edges[i]->setColor(GC_BLACK);
                    edges[i]->getTwin()->setColor(GC_BLACK);
                    marked_edges += 2;
                    trans_count++;
                }
            }
            edges[i]->getEnd()->setColor(GC_WHITE);
        }
    }

    if(trans_count > 0)
        ++marked_verts;

    return false;
}
Exemplo n.º 25
0
bool SGSmoothingVisitor::visit(StringGraph* pGraph, Vertex* pVertex)
{
    (void)pGraph;
    if(pVertex->getColor() == GC_RED)
        return false;

    bool found = false;
    for(size_t idx = 0; idx < ED_COUNT; idx++)
    {
        EdgeDir dir = EDGE_DIRECTIONS[idx];
        EdgePtrVec edges = pVertex->getEdges(dir);
        if(edges.size() <= 1)
            continue;

        for(size_t i = 0; i < edges.size(); ++i)
        {
            if(edges[i]->getEnd()->getColor() == GC_RED)
                return false;
        }

        //std::cout << "Smoothing " << pVertex->getID() << "\n";

        const int MAX_WALKS = 10;
        const int MAX_DISTANCE = 5000;
        bool bIsDegenerate = false;
        bool bFailGapCheck = false;
        bool bFailDivergenceCheck = false;
        bool bFailIndelSizeCheck = false;

        SGWalkVector variantWalks;
        SGSearch::findVariantWalks(pVertex, dir, MAX_DISTANCE, MAX_WALKS, variantWalks);

        if(variantWalks.size() > 0)
        {
            found = true;
            size_t selectedIdx = -1;
            size_t selectedCoverage = 0;

            // Calculate the minimum amount overlapped on the start/end vertex.
            // This is used to properly extract the sequences from walks that represent the variation.
            int minOverlapX = std::numeric_limits<int>::max();
            int minOverlapY = std::numeric_limits<int>::max();

            for(size_t i = 0; i < variantWalks.size(); ++i)
            {
                if(variantWalks[i].getNumEdges() <= 1)
                    bIsDegenerate = true;

                // Calculate the walk coverage using the internal vertices of the walk. 
                // The walk with the highest coverage will be retained
                size_t walkCoverage = 0;
                for(size_t j = 1; j < variantWalks[i].getNumVertices() - 1; ++j)
                    walkCoverage += variantWalks[i].getVertex(j)->getCoverage();

                if(walkCoverage > selectedCoverage || selectedCoverage == 0)
                {
                    selectedIdx = i;
                    selectedCoverage = walkCoverage;
                }
                
                Edge* pFirstEdge = variantWalks[i].getFirstEdge();
                Edge* pLastEdge = variantWalks[i].getLastEdge();

                if((int)pFirstEdge->getMatchLength() < minOverlapX)
                    minOverlapX = pFirstEdge->getMatchLength();

                if((int)pLastEdge->getTwin()->getMatchLength() < minOverlapY)
                    minOverlapY = pLastEdge->getTwin()->getMatchLength();
            }

            // Calculate the strings for each walk that represent the region of variation
            StringVector walkStrings;
            for(size_t i = 0; i < variantWalks.size(); ++i)
            {
                Vertex* pStartVertex = variantWalks[i].getStartVertex();
                Vertex* pLastVertex = variantWalks[i].getLastVertex();
                assert(pStartVertex != NULL && pLastVertex != NULL);
                
                std::string full = variantWalks[i].getString(SGWT_START_TO_END);
                int posStart = 0;
                int posEnd = 0;

                if(dir == ED_ANTISENSE)
                {
                    // pLast   -----------
                    // pStart          ------------
                    // full    --------------------
                    // out             ----
                    posStart = pLastVertex->getSeqLen() - minOverlapY;
                    posEnd = full.size() - (pStartVertex->getSeqLen() - minOverlapX);
                }
                else
                {
                    // pStart         --------------
                    // pLast   -----------
                    // full    ---------------------
                    // out            ----
                    posStart = pStartVertex->getSeqLen() - minOverlapX; // match start position
                    posEnd = full.size() - (pLastVertex->getSeqLen() - minOverlapY); // match end position
                }
                
                std::string out;
                if(posEnd > posStart)
                    out = full.substr(posStart, posEnd - posStart);
                walkStrings.push_back(out);
            }

            assert(selectedIdx != (size_t)-1);
            SGWalk& selectedWalk = variantWalks[selectedIdx];
            assert(selectedWalk.isIndexed());

            // Check the divergence of the other walks to this walk
            StringVector cigarStrings;
            std::vector<int> maxIndel;
            std::vector<double> gapPercent; // percentage of matching that is gaps
            std::vector<double> totalPercent; // percent of total alignment that is mismatch or gap

            cigarStrings.resize(variantWalks.size());
            gapPercent.resize(variantWalks.size());
            totalPercent.resize(variantWalks.size());
            maxIndel.resize(variantWalks.size());

            for(size_t i = 0; i < variantWalks.size(); ++i)
            {
                if(i == selectedIdx)
                    continue;

                // We want to compute the total gap length, total mismatches and percent
                // divergence between the two paths.
                int matchLen = 0;
                int totalDiff = 0;
                int gapLength = 0;
                int maxGapLength = 0;
                // We have to handle the degenerate case where one internal string has zero length
                // this can happen when there is an isolated insertion/deletion and the walks are like:
                // x -> y -> z
                // x -> z
                if(walkStrings[selectedIdx].empty() || walkStrings[i].empty())
                {
                    matchLen = std::max(walkStrings[selectedIdx].size(), walkStrings[i].size());
                    totalDiff = matchLen;
                    gapLength = matchLen;
                }
                else
                {
                    AlnAln *aln_global;
                    aln_global = aln_stdaln(walkStrings[selectedIdx].c_str(), walkStrings[i].c_str(), &aln_param_blast, 1, 1);

                    // Calculate the alignment parameters
                    while(aln_global->outm[matchLen] != '\0')
                    {
                        if(aln_global->outm[matchLen] == ' ')
                            totalDiff += 1;
                        matchLen += 1;
                    }

                    std::stringstream cigarSS;
                    for (int j = 0; j != aln_global->n_cigar; ++j)
                    {
                        char cigarOp = "MID"[aln_global->cigar32[j]&0xf];
                        int cigarLen = aln_global->cigar32[j]>>4;
                        if(cigarOp == 'I' || cigarOp == 'D')
                        {
                            gapLength += cigarLen;
                            if(gapLength > maxGapLength)
                                maxGapLength = gapLength;
                        }

                        cigarSS << cigarLen;
                        cigarSS << cigarOp;
                    }
                    cigarStrings[i] = cigarSS.str();
                    aln_free_AlnAln(aln_global);
                }

                double percentDiff = (double)totalDiff / matchLen;
                double percentGap = (double)gapLength / matchLen;

                if(percentDiff > m_maxTotalDivergence)
                    bFailDivergenceCheck = true;
                
                if(percentGap > m_maxGapDivergence)
                    bFailGapCheck = true;

                if(maxGapLength > m_maxIndelLength)
                    bFailIndelSizeCheck = true;

                gapPercent[i] = percentGap;
                totalPercent[i] = percentDiff;
                maxIndel[i] = maxGapLength;
            }

            if(bIsDegenerate || bFailGapCheck || bFailDivergenceCheck || bFailIndelSizeCheck)
                continue;

            // Write the selected path to the variants file as variant 0
            int variantIdx = 0;
            std::string selectedSequence = selectedWalk.getString(SGWT_START_TO_END);
            std::stringstream ss;
            ss << "variant-" << m_numRemovedTotal << "/" << variantIdx++;
            writeFastaRecord(&m_outFile, ss.str(), selectedSequence);


            // The vertex set for each walk is not necessarily disjoint,
            // the selected walk may contain vertices that are part
            // of other paths. We handle this be initially marking all
            // vertices of the 
            for(size_t i = 0; i < variantWalks.size(); ++i)
            {
                if(i == selectedIdx)
                    continue;

                SGWalk& currWalk = variantWalks[i];
                for(size_t j = 0; j < currWalk.getNumEdges() - 1; ++j)
                {
                    Edge* currEdge = currWalk.getEdge(j);
                    
                    // If the vertex is also on the selected path, do not mark it
                    Vertex* currVertex = currEdge->getEnd();
                    if(!selectedWalk.containsVertex(currVertex->getID()))
                    {
                        currEdge->getEnd()->setColor(GC_RED);
                    }
                }

                // Write the variant to a file
                std::string variantSequence = currWalk.getString(SGWT_START_TO_END);
                std::stringstream variantID;
                std::stringstream ss;
                ss << "variant-" << m_numRemovedTotal << "/" << variantIdx++;
                ss << " IGD:" << (double)gapPercent[i] << " ITD:" << totalPercent[i] << " MID: " << maxIndel[i] << " InternalCigar:" << cigarStrings[i];
                writeFastaRecord(&m_outFile, ss.str(), variantSequence);
            }

            if(variantWalks.size() == 2)
                m_simpleBubblesRemoved += 1;
            else
                m_complexBubblesRemoved += 1;
            ++m_numRemovedTotal;
        }
    }
Exemplo n.º 26
0
size_t Vertex::countEdges(EdgeDir dir)
{
    EdgePtrVec ev = getEdges(dir);
    return ev.size();
}
Exemplo n.º 27
0
//
// SGPETrustVisitor - determines which edges in the 
// string graph are "trusted" - the reads overlapping
// in the edge have pairs that also overlap
//
bool SGPETrustVisitor::visit(StringGraph* /*pGraph*/, Vertex* /*pVertex*/)
{
#if 0
    Vertex* pPairVertex = pVertex->getPairVertex();
    if(pPairVertex == NULL)
        return false;

    // First, mark all pair vertices that overlap the pair of this node
    // The set of marked vertices that overlap pVertex are the trusted vertices
    EdgePtrVec pairEdgeVec = pPairVertex->getEdges();
    for(size_t i = 0; i < pairEdgeVec.size(); ++i)
    {
        // Get the pair of the endpoint of this edge
        Vertex* pBackVertex = pairEdgeVec[i]->getEnd()->getPairVertex();
        if(pBackVertex != NULL)
            pBackVertex->setColor(GC_RED);
    }

    EdgePtrVec vertEdgeVec = pVertex->getEdges();
    
    bool changed = true;
    while(changed)
    {
        changed = false;
        // Propogate trust
        for(size_t i = 0; i < vertEdgeVec.size(); ++i)
        {
            Vertex* pCurr = vertEdgeVec[i]->getEnd();
            if(pCurr->getColor() != GC_RED)
            {
                // If any vertex that pCurr overlaps with is red, mark it red too
                EdgePtrVec currEdgeVec = pCurr->getEdges();
                for(size_t j = 0; j < currEdgeVec.size(); ++j)
                {
                    if(currEdgeVec[j]->getEnd()->getColor() == GC_RED)
                    {
                        pCurr->setColor(GC_RED);
                        changed = true;
                        break;
                    }
                }
            }
        }
    }

    // 
    int trusted = 0;
    int nottrusted = 0;
    int diffstrand = 0;
    for(size_t i = 0; i < vertEdgeVec.size(); ++i)
    {
        if(vertEdgeVec[i]->getEnd()->getColor() == GC_RED)
        {
            trusted++;
            vertEdgeVec[i]->isTrusted = true;
        }
        else
        {
            nottrusted++;
        }
    }

    (void)diffstrand;
    //printf("TOKEN\t%d\t%d\t%d\t%zu\n", trusted, nottrusted, diffstrand, vertEdgeVec.size());

    // Reset all the vertex colors
    for(size_t i = 0; i < pairEdgeVec.size(); ++i)
    {
        // Get the pair of the endpoint of this edge
        Vertex* pBackVertex = pairEdgeVec[i]->getEnd()->getPairVertex();
        if(pBackVertex)
            pBackVertex->setColor(GC_WHITE);
    }

    for(size_t i = 0; i < vertEdgeVec.size(); ++i)
        vertEdgeVec[i]->getEnd()->setColor(GC_WHITE);
#endif
    return false;
}