// // High level merge function that does not specify an edge // void Bigraph::mergeVertices(VertexID id1, VertexID id2) { Vertex* pVert1 = getVertex(id1); // Get the edges from vertex1 to vertex2 EdgePtrVec edgesTo = pVert1->findEdgesTo(id2); if(edgesTo.empty()) { std::cerr << "mergeVertices: vertices are not connected\n"; return; } if(edgesTo.size() > 1) { std::cerr << "mergeVertces: cannot merge because of ambigious edges\n"; return; } // There is a single unique edge between the vertices Edge* mergeEdge = *edgesTo.begin(); // Call the real merging function merge(pVert1, mergeEdge); }
bool SGIdenticalRemoveVisitor::visit(StringGraph* /*pGraph*/, Vertex* pVertex) { if(!pVertex->isContained()) return false; // Check if this vertex is identical to any other vertex EdgePtrVec neighborEdges = pVertex->getEdges(); for(size_t i = 0; i < neighborEdges.size(); ++i) { Edge* pEdge = neighborEdges[i]; Vertex* pOther = pEdge->getEnd(); if(pVertex->getSeqLen() != pOther->getSeqLen()) continue; Overlap ovr = pEdge->getOverlap(); if(!ovr.isContainment() || ovr.getContainedIdx() != 0) continue; if(pVertex->getSeq() == pOther->getSeq()) { pVertex->setColor(GC_BLACK); ++count; break; } } return false; }
// Simplify the graph by compacting edges in the given direction void Bigraph::simplify(EdgeDir dir) { bool graph_changed = true; while(graph_changed) { graph_changed = false; VertexPtrMapIter iter = m_vertices.begin(); while(iter != m_vertices.end()) { // Get the edges for this direction EdgePtrVec edges = iter->second->getEdges(dir); // If there is a single edge in this direction, merge the vertices // Don't merge singular self edges though if(edges.size() == 1 && !edges.front()->isSelf()) { // Check that the edge back is singular as well Edge* pSingle = edges.front(); Edge* pTwin = pSingle->getTwin(); Vertex* pV2 = pSingle->getEnd(); if(pV2->countEdges(pTwin->getDir()) == 1) { merge(iter->second, pSingle); graph_changed = true; } } ++iter; } } }
// // Flip a vertex // void Bigraph::flip(VertexID /*id*/) { assert(false); #if 0 // TODO: update this code Vertex* pVertex = getVertex(id); EdgePtrVec edges = pVertex->getEdges(); for(EdgePtrVecIter iter = edges.begin(); iter != edges.end(); ++iter) { // Get the old twin GraphEdgeType twin = iter->getTwin(); GraphEdgeType flipped = *iter; flipped.flip(); // Remove the edge from the source ver pVertex->removeEdge(*iter); pVertex->addEdge(flipped); // Update the partner by deleting the old twin and Vertex* pV2 = getVertex(twin.getStart()); pV2->removeEdge(twin); pV2->addEdge(flipped.getTwin()); } #endif }
// Mark any nodes that either dont have edges or edges in only one direction for removal bool SGChimericVisitor::visit(StringGraph* /*pGraph*/, Vertex* pVertex) { // Check if this node is chimeric if (pVertex->countEdges(ED_SENSE) == 1 && pVertex->countEdges(ED_ANTISENSE) == 1 && pVertex->getSeqLen() <= m_minLength) { Edge* pPrevEdge = pVertex->getEdges(ED_ANTISENSE)[0]; Edge* pNextEdge = pVertex->getEdges(ED_SENSE)[0]; Vertex* pPrevVertex = pPrevEdge->getEnd(); Vertex* pNextVertex = pNextEdge->getEnd(); bool chimeric = true; if (chimeric) chimeric &= (pPrevVertex->countEdges(ED_SENSE) >= 2); //chimeric &= (pPrevVertex->countEdges(ED_SENSE) == 2 && pPrevVertex->countEdges(ED_ANTISENSE) == 1); if (chimeric) chimeric &= (pNextVertex->countEdges(ED_ANTISENSE) >= 2); //chimeric &= (pNextVertex->countEdges(ED_SENSE) == 1 && pNextVertex->countEdges(ED_ANTISENSE) == 2); if (chimeric) { // smallest? bool smallest = false; { EdgePtrVec edges = pPrevVertex->getEdges(ED_SENSE); for(size_t k = 0; k < edges.size(); ++k) { if (edges[k]->getMatchLength() > pPrevEdge->getMatchLength() && edges[k]->getMatchLength() - pPrevEdge->getMatchLength() >= _delta) { smallest = true; } } } { EdgePtrVec edges = pNextVertex->getEdges(ED_ANTISENSE); for(size_t k = 0; k < edges.size(); ++k) { if (edges[k]->getMatchLength() > pNextEdge->getMatchLength() && edges[k]->getMatchLength() - pNextEdge->getMatchLength() >= _delta) { smallest = true; } } } chimeric &= smallest; } if (chimeric) { //bool smallest = false; //chimeric &= smallest; } if (chimeric) { //std::cout << "chimeric\t" << pVertex->getID() << "\t" << _delta << "\t" << pVertex->getSeq() << "\n"; pVertex->setColor(GC_BLACK); ++num_chimeric; return true; } } return false; }
float WingedUtil :: averageLength (const WingedMesh& mesh, const EdgePtrVec& edges) { assert (edges.size () > 0); float l = 0.0f; for (const WingedEdge* e : edges) { l += e->length (mesh); } return l / float (edges.size ()); }
// // Get the edges in a particular direction // This preserves the ordering of the edges // EdgePtrVec Vertex::getEdges(EdgeDir dir) const { EdgePtrVecConstIter iter = m_edges.begin(); EdgePtrVec outEdges; for(; iter != m_edges.end(); ++iter) { if((*iter)->getDir() == dir) outEdges.push_back(*iter); } return outEdges; }
// Find edges to the specified vertex EdgePtrVec Vertex::findEdgesTo(VertexID id) { EdgePtrVecConstIter iter = m_edges.begin(); EdgePtrVec outEdges; for(; iter != m_edges.end(); ++iter) { if((*iter)->getEndID() == id) outEdges.push_back(*iter); } return outEdges; }
EdgePtrVec toEdgeVec () const { EdgePtrVec edges; for (WingedFace* f : this->faces) { for (WingedEdge& e : f->adjacentEdges ()) { if (e.isLeftFace (*f) || (this->faces.count (e.otherFace (*f)) == 0)) { edges.push_back (&e); } } } return edges; }
// // SGOverlapWriterVisitor - write all the overlaps in the graph to a file // bool SGOverlapWriterVisitor::visit(StringGraph* /*pGraph*/, Vertex* pVertex) { EdgePtrVec edges = pVertex->getEdges(); for(size_t i = 0; i < edges.size(); ++i) { Overlap ovr = edges[i]->getOverlap(); if(ovr.id[0] < ovr.id[1]) m_fileHandle << ovr << "\n"; } return false; }
// // Merge two vertices along the specified edge // void Bigraph::merge(Vertex* pV1, Edge* pEdge) { Vertex* pV2 = pEdge->getEnd(); //std::cout << "Merging " << pV1->getID() << " with " << pV2->getID() << "\n"; // Merge the data pV1->merge(pEdge); // Get the twin edge (the edge in v2 that points to v1) Edge* pTwin = pEdge->getTwin(); // Ensure v2 has the twin edge assert(pV2->hasEdge(pTwin)); // Get the edge set opposite of the twin edge (which will be the new edges in this direction for V1) EdgePtrVec transEdges = pV2->getEdges(!pTwin->getDir()); // Move the edges from pV2 to pV1 for(EdgePtrVecIter iter = transEdges.begin(); iter != transEdges.end(); ++iter) { Edge* pTransEdge = *iter; // Remove the edge from V2, this does not destroy the edge pV2->removeEdge(pTransEdge); // Join pEdge to the start of transEdge // This updates the starting point of pTransEdge to be V1 // This calls Edge::extend on the twin edge pTransEdge->join(pEdge); assert(pTransEdge->getDir() == pEdge->getDir()); pV1->addEdge(pTransEdge); // add to V1 // Notify the edges they have been updated pTransEdge->update(); pTransEdge->getTwin()->update(); } // Remove the edge from pV1 to pV2 pV1->removeEdge(pEdge); delete pEdge; pEdge = 0; // Remove the edge from pV2 to pV1 pV2->removeEdge(pTwin); delete pTwin; pEdge = 0; // Remove V2 // It is guarenteed to not be connected removeIslandVertex(pV2); //validate(); }
void StringGraphGenerator::resetContainmentFlags(Vertex* pVertex) { if(!pVertex->isContained()) return; pVertex->setContained(false); // Set the containment flag for all the vertices that have containment edges with this vertex EdgePtrVec edges = pVertex->getEdges(); for(size_t i = 0; i < edges.size(); ++i) { Edge* pEdge = edges[i]; if(pEdge->getOverlap().isContainment()) pEdge->getEnd()->setContained(true); } }
bool SGMaximalOverlapVisitor::visit(StringGraph* /*pGraph*/, Vertex* pVertex) { bool modified = false; typedef bool(*PredicateEdge)(const Edge*); PredicateEdge predicateEdgeArray[ED_COUNT] = {SGMaximalOverlapVisitor::isSenseEdge, SGMaximalOverlapVisitor::isAntiSenseEdge}; for(size_t idx = 0; idx < ED_COUNT; idx++) { EdgeDir dir = EDGE_DIRECTIONS[idx]; EdgePtrVec edges = pVertex->getEdges(dir); // These edges are already sorted by overlap length if(edges.empty()) continue; //return false; for(size_t i = 1; i < edges.size(); ++i) { if (edges[i]->getMatchLength() == edges[0]->getMatchLength()) continue; bool valid = false; //EdgePtrVec redges = edges[i]->getEnd()->getEdges(EDGE_DIRECTIONS[ED_COUNT - idx - 1]); EdgePtrVec redges = edges[i]->getEnd()->getEdges(); EdgePtrVec::iterator last = std::remove_if(redges.begin(), redges.end(), predicateEdgeArray[idx]); redges.resize(std::distance(redges.begin(), last)); assert(!redges.empty()); for(size_t j = 0; j < redges.size(); ++j) { if (redges[j]->getEndID() == pVertex->getID() && edges[0]->getMatchLength() - edges[i]->getMatchLength() <= _delta) { valid = true; } } if (!valid) { edges[i]->setColor(GC_BLACK); edges[i]->getTwin()->setColor(GC_BLACK); modified = true; } } } return modified; }
bool SGRemodelVisitor::visit(StringGraph* pGraph, Vertex* pVertex) { bool graph_changed = false; // Construct the set of overlaps reachable within the current parameters CompleteOverlapSet vertexOverlapSet(pVertex, m_remodelER, pGraph->getMinOverlap()); SGAlgorithms::EdgeDescOverlapMap containMap; vertexOverlapSet.computeIrreducible(NULL, &containMap); SGAlgorithms::EdgeDescOverlapMap irreducibleMap = vertexOverlapSet.getOverlapMap(); // Construct the set of edges that should be added EdgePtrVec edges = pVertex->getEdges(); for(size_t i = 0; i < edges.size(); ++i) { SGAlgorithms::EdgeDescOverlapMap::iterator iter = irreducibleMap.find(edges[i]->getDesc()); if(iter != irreducibleMap.end()) { // Edge exists already irreducibleMap.erase(iter); } else { edges[i]->setColor(GC_BLACK); edges[i]->getTwin()->setColor(GC_BLACK); //std::cout << "Marking edge for deletion: " << edges[i]->getOverlap() << "\n"; } } // Add remaining edges in the irreducible map SGAlgorithms::EdgeDescOverlapMap::iterator iter; for(iter = irreducibleMap.begin(); iter != irreducibleMap.end(); ++iter) { Overlap& ovr = iter->second; //std::cout << "Adding overlap: " << ovr << "\n"; SGAlgorithms::createEdgesFromOverlap(pGraph, ovr, false); graph_changed = true; } // Update the containment flags in the graph to ensure that we can subsequently remove containment verts SGAlgorithms::updateContainFlags(pGraph, pVertex, containMap); return graph_changed; }
bool SGContainRemoveVisitor::visit(StringGraph* pGraph, Vertex* pVertex) { if(!pVertex->isContained()) return false; //cout << pVertex->getID() << endl; // debug // Add any new irreducible edges that exist when pToRemove is deleted // from the graph EdgePtrVec neighborEdges = pVertex->getEdges(); // If the graph has been transitively reduced, we have to check all // the neighbors to see if any new edges need to be added. If the graph is a // complete overlap graph we can just remove the edges to the deletion vertex if(!pGraph->hasTransitive() && !pGraph->isExactMode()) { // This must be done in order of edge length or some transitive edges // may be created EdgeLenComp comp; std::sort(neighborEdges.begin(), neighborEdges.end(), comp); for(size_t j = 0; j < neighborEdges.size(); ++j) { Vertex* pRemodelVert = neighborEdges[j]->getEnd(); Edge* pRemodelEdge = neighborEdges[j]->getTwin(); SGAlgorithms::remodelVertexForExcision(pGraph, pRemodelVert, pRemodelEdge); } } // Delete the edges from the graph for(size_t j = 0; j < neighborEdges.size(); ++j) { Vertex* pRemodelVert = neighborEdges[j]->getEnd(); Edge* pRemodelEdge = neighborEdges[j]->getTwin(); pRemodelVert->deleteEdge(pRemodelEdge); pVertex->deleteEdge(neighborEdges[j]); } pVertex->setColor(GC_BLACK); return false; }
// Construct the walk structure from a vector of edges SGWalk::SGWalk(const EdgePtrVec& edgeVec, bool bIndexWalk) : m_extensionDistance(0), m_extensionFinished(false) { assert(!edgeVec.empty()); if(bIndexWalk) m_pWalkIndex = new WalkIndex; else m_pWalkIndex = NULL; // The start vector is the start vertex of the first edge Edge* first = edgeVec.front(); m_pStartVertex = first->getStart(); for(EdgePtrVec::const_iterator iter = edgeVec.begin(); iter != edgeVec.end(); ++iter) { addEdge(*iter); } }
// // SGPairedOverlapVisitor - print a formatted report to stdout // detailing how much overlap there is between both end of a paired // read. // bool SGPairedOverlapVisitor::visit(StringGraph* /*pGraph*/, Vertex* /*pVertex*/) { #if 0 Vertex* pPairSV = pVertex->getPairVertex(); if(pPairSV == NULL) return false; EdgePtrVec edges = pVertex->getEdges(); // Determine which vertices that are paired to pVertex // have a pair that overlaps with pPairVertex for(size_t i = 0; i < edges.size(); ++i) { Edge* pVWEdge = edges[i]; Vertex* pW = pVWEdge->getEnd(); Vertex* pPairW = pW->getPairVertex(); if(pPairW == NULL) continue; EdgePtrVec ppw_edges = pPairW->findEdgesTo(pPairSV->getID()); size_t overlap_len = pVWEdge->getMatchLength(); if(pVWEdge->getComp() == EC_SAME) { if(ppw_edges.size() == 1) { Edge* pPPEdge = ppw_edges.front(); size_t pair_overlap_len = pPPEdge->getMatchLength(); printf("pairoverlap\t%s\t%s\t%zu\t%zu\n", pVertex->getID().c_str(), pW->getID().c_str(), overlap_len, pair_overlap_len); } else { printf("pairoverlap\t%s\t%s\t%zu\t%d\n", pVertex->getID().c_str(), pW->getID().c_str(), overlap_len, 0); } } } #endif return false; }
bool SGPEConflictRemover::visit(StringGraph* pGraph, Vertex* pVertex) { (void)pGraph; (void)pVertex; for(size_t idx = 0; idx < ED_COUNT; idx++) { EdgeDir dir = EDGE_DIRECTIONS[idx]; EdgePtrVec edges = pVertex->getEdges(dir); if(edges.size() > 1) { bool hasTrusted = false; for(size_t j = 0; j < edges.size(); ++j) { if(edges[j]->isTrusted) { hasTrusted = true; } } if(hasTrusted) { for(size_t j = 0; j < edges.size(); ++j) { if(!edges[j]->isTrusted) { edges[j]->setColor(GC_BLACK); edges[j]->getTwin()->setColor(GC_BLACK); } if(edges[j]->getComp() == EC_SAME) num_same++; else num_diff++; } } } } return 0; }
bool SGEdgeStatsVisitor::visit(StringGraph* pGraph, Vertex* pVertex) { const int MIN_OVERLAP = pGraph->getMinOverlap(); const double MAX_ERROR = pGraph->getErrorRate(); static int visited = 0; ++visited; if(visited % 50000 == 0) std::cout << "visited: " << visited << "\n"; // Add stats for the found overlaps EdgePtrVec edges = pVertex->getEdges(); for(size_t i = 0; i < edges.size(); ++i) { Overlap ovr = edges[i]->getOverlap(); int numDiff = ovr.match.countDifferences(pVertex->getStr(), edges[i]->getEnd()->getStr()); int overlapLen = ovr.match.getMinOverlapLength(); addOverlapToCount(overlapLen, numDiff, foundCounts); } // Explore the neighborhood around this graph for potentially missing overlaps CandidateVector candidates = getMissingCandidates(pGraph, pVertex, MIN_OVERLAP); MultiOverlap addedMO(pVertex->getID(), pVertex->getStr()); for(size_t i = 0; i < candidates.size(); ++i) { Candidate& c = candidates[i]; int numDiff = c.ovr.match.countDifferences(pVertex->getStr(), c.pEndpoint->getStr()); double error_rate = double(numDiff) / double(c.ovr.match.getMinOverlapLength()); if(error_rate < MAX_ERROR) { int overlapLen = c.ovr.match.getMinOverlapLength(); addOverlapToCount(overlapLen, numDiff, missingCounts); } } return false; }
void addNeighborsToSubgraph(Vertex* pCurrVertex, StringGraph* pSubgraph, int span) { if(span <= 0) return; // These are the edges in the main graph EdgePtrVec edges = pCurrVertex->getEdges(); for(size_t i = 0; i < edges.size(); ++i) { if(edges[i]->getColor() != GC_BLACK) { Vertex* pY = edges[i]->getEnd(); copyVertexToSubgraph(pSubgraph, pY); Overlap ovr = edges[i]->getOverlap(); SGAlgorithms::createEdgesFromOverlap(pSubgraph, ovr, true); edges[i]->setColor(GC_BLACK); edges[i]->getTwin()->setColor(GC_BLACK); // Recurse addNeighborsToSubgraph(pY, pSubgraph, span - 1); } } }
// Explore the neighborhood around a vertex looking for missing overlaps SGEdgeStatsVisitor::CandidateVector SGEdgeStatsVisitor::getMissingCandidates(StringGraph* /*pGraph*/, Vertex* pVertex, int minOverlap) const { CandidateVector out; // Mark the vertices that are reached from this vertex as black to indicate // they already are overlapping EdgePtrVec edges = pVertex->getEdges(); for(size_t i = 0; i < edges.size(); ++i) { edges[i]->getEnd()->setColor(GC_BLACK); } pVertex->setColor(GC_BLACK); for(size_t i = 0; i < edges.size(); ++i) { Edge* pXY = edges[i]; EdgePtrVec neighborEdges = pXY->getEnd()->getEdges(); for(size_t j = 0; j < neighborEdges.size(); ++j) { Edge* pYZ = neighborEdges[j]; if(pYZ->getEnd()->getColor() != GC_BLACK) { // Infer the overlap object from the edges Overlap ovrXY = pXY->getOverlap(); Overlap ovrYZ = pYZ->getOverlap(); if(SGAlgorithms::hasTransitiveOverlap(ovrXY, ovrYZ)) { Overlap ovr_xz = SGAlgorithms::inferTransitiveOverlap(ovrXY, ovrYZ); if(ovr_xz.match.getMinOverlapLength() >= minOverlap) { out.push_back(Candidate(pYZ->getEnd(), ovr_xz)); pYZ->getEnd()->setColor(GC_BLACK); } } } } } // Reset colors for(size_t i = 0; i < edges.size(); ++i) edges[i]->getEnd()->setColor(GC_WHITE); pVertex->setColor(GC_WHITE); for(size_t i = 0; i < out.size(); ++i) out[i].pEndpoint->setColor(GC_WHITE); return out; }
// Find bubbles (nodes where there is a split and then immediate rejoin) and mark them for removal bool SGBubbleVisitor::visit(StringGraph* /*pGraph*/, Vertex* pVertex) { bool bubble_found = false; for(size_t idx = 0; idx < ED_COUNT; idx++) { EdgeDir dir = EDGE_DIRECTIONS[idx]; EdgePtrVec edges = pVertex->getEdges(dir); if(edges.size() > 1) { Vertex* pStart = pVertex; Vertex* pEnd = NULL; // Check the vertices for(size_t i = 0; i < edges.size(); ++i) { Edge* pVWEdge = edges[i]; Vertex* pWVert = pVWEdge->getEnd(); // Get the edges from w in the same direction EdgeDir transDir = !pVWEdge->getTwinDir(); EdgePtrVec wEdges = pWVert->getEdges(transDir); if(pWVert->getColor() == GC_RED) return false; // If the bubble has collapsed, there should only be one edge if(wEdges.size() == 1) { Vertex* pBubbleEnd = wEdges.front()->getEnd(); if(pBubbleEnd->getColor() == GC_RED) return false; } } // Mark the vertices for(size_t i = 0; i < edges.size(); ++i) { Edge* pVWEdge = edges[i]; Vertex* pWVert = pVWEdge->getEnd(); // Get the edges from w in the same direction EdgeDir transDir = !pVWEdge->getTwinDir(); EdgePtrVec wEdges = pWVert->getEdges(transDir); // If the bubble has collapsed, there should only be one edge if(wEdges.size() == 1) { Vertex* pBubbleEnd = wEdges.front()->getEnd(); if(pBubbleEnd->getColor() == GC_BLACK) { // The endpoint has been visited, set this vertex as needing removal // and set the endpoint as unvisited pWVert->setColor(GC_RED); bubble_found = true; pEnd = pBubbleEnd; } else { pBubbleEnd->setColor(GC_BLACK); pWVert->setColor(GC_BLUE); } } } // Unmark vertices for(size_t i = 0; i < edges.size(); ++i) { Edge* pVWEdge = edges[i]; Vertex* pWVert = pVWEdge->getEnd(); // Get the edges from w in the same direction EdgeDir transDir = !pVWEdge->getTwinDir(); EdgePtrVec wEdges = pWVert->getEdges(transDir); // If the bubble has collapsed, there should only be one edge if(wEdges.size() == 1) { Vertex* pBubbleEnd = wEdges.front()->getEnd(); pBubbleEnd->setColor(GC_WHITE); } if(pWVert->getColor() == GC_BLUE) pWVert->setColor(GC_WHITE); } (void)pStart; (void)pEnd; if(bubble_found) { /* SGWalkVector walkVector; SGSearch::findWalks(pStart, pEnd, dir, 1000, 20, walkVector); if(walkVector.size() == 2) { SGWalk& walk1 = walkVector[0]; SGWalk& walk2 = walkVector[1]; int len1 = walk1.getStartToEndDistance(); int len2 = walk2.getStartToEndDistance(); int diff = len1 - len2; std::string type = "SNP"; if(diff != 0) { type = "INDEL"; } std::cout << "Bubble " << pStart->getID() << " to " << pEnd->getID() << " is a " << type << "(d: " << diff << ")\n"; } */ ++num_bubbles; } } } return bubble_found; }
// Find bubbles (nodes where there is a split and then immediate rejoin) and mark them for removal bool SGBubbleEdgeVisitor::visit(StringGraph* /*pGraph*/, Vertex* pX) { bool bubble_found = false; for(size_t idx = 0; idx < ED_COUNT; idx++) { EdgeDir dir = EDGE_DIRECTIONS[idx]; EdgePtrVec edges = pX->getEdges(dir); if(edges.size() == 2) // di-bubbles only for now { // Determine which edge has a shorter overlap to pX // Call the longer overlap pY, the shorter pZ Edge* pXY; Edge* pXZ; if(edges[0]->getOverlap().getOverlapLength(0) > edges[1]->getOverlap().getOverlapLength(0)) { pXY = edges[0]; pXZ = edges[1]; } else if(edges[1]->getOverlap().getOverlapLength(0) > edges[0]->getOverlap().getOverlapLength(0)) { pXY = edges[1]; pXZ = edges[0]; } else { break; // equal length overlaps, cannot be a bubble or else the vertices would be contained } // Mark the neighbors of pZ as the "target" vertices // if they can be reached by pY we mark pY as being unreliable and remove it typedef std::list<Vertex*> VertexPtrList; VertexPtrList targetList; EdgeDir targetDir = pXZ->getTransitiveDir(); EdgePtrVec targetEdges = pXZ->getEnd()->getEdges(targetDir); for(size_t i = 0; i < targetEdges.size(); ++i) targetList.push_back(targetEdges[i]->getEnd()); // Start exploring from pY ExploreQueue queue; Overlap ovrXY = pXY->getOverlap(); EdgeDesc edXY = pXY->getDesc(); queue.push(ExploreElement(edXY, ovrXY)); int numSteps = 100; WARN_ONCE("USING FIXED NUMBER OF STEPS IN BUBBLE EDGE"); while(!queue.empty() && numSteps-- > 0) { ExploreElement ee = queue.front(); EdgeDesc& edXY = ee.ed; Vertex* pY = edXY.pVertex; Overlap& ovrXY = ee.ovr; queue.pop(); // Check if Y is on the target list VertexPtrList::iterator iter = targetList.begin(); while(iter != targetList.end()) { if(*iter == edXY.pVertex) targetList.erase(iter++); else ++iter; } if(targetList.empty()) break; // Enqueue the neighbors of pY EdgeDir dirY = edXY.getTransitiveDir(); EdgePtrVec edges = pY->getEdges(dirY); for(size_t i = 0; i < edges.size(); ++i) { Edge* pEdge = edges[i]; Vertex* pZ = pEdge->getEnd(); // Compute the edgeDesc and overlap on pX for this edge Overlap ovrYZ = pEdge->getOverlap(); if(SGAlgorithms::hasTransitiveOverlap(ovrXY, ovrYZ)) { Overlap ovrXZ = SGAlgorithms::inferTransitiveOverlap(ovrXY, ovrYZ); EdgeDesc edXZ = SGAlgorithms::overlapToEdgeDesc(pZ, ovrXZ); queue.push(ExploreElement(edXZ, ovrXZ)); } } } if(targetList.empty()) { // bubble found pXZ->getEnd()->deleteEdges(); pXZ->getEnd()->setColor(GC_RED); bubble_found = true; ++num_bubbles; } } } return bubble_found; }
bool SGTransitiveReductionVisitor::visit(StringGraph* /*pGraph*/, Vertex* pVertex) { size_t trans_count = 0; static const size_t FUZZ = 10; // see myers for(size_t idx = 0; idx < ED_COUNT; idx++) { EdgeDir dir = EDGE_DIRECTIONS[idx]; EdgePtrVec edges = pVertex->getEdges(dir); // These edges are already sorted if(edges.size() == 0) continue; for(size_t i = 0; i < edges.size(); ++i) (edges[i])->getEnd()->setColor(GC_GRAY); Edge* pLongestEdge = edges.back(); size_t longestLen = pLongestEdge->getSeqLen() + FUZZ; // Stage 1 for(size_t i = 0; i < edges.size(); ++i) { Edge* pVWEdge = edges[i]; Vertex* pWVert = pVWEdge->getEnd(); EdgeDir transDir = !pVWEdge->getTwinDir(); if(pWVert->getColor() == GC_GRAY) { EdgePtrVec w_edges = pWVert->getEdges(transDir); for(size_t j = 0; j < w_edges.size(); ++j) { Edge* pWXEdge = w_edges[j]; size_t trans_len = pVWEdge->getSeqLen() + pWXEdge->getSeqLen(); if(trans_len <= longestLen) { if(pWXEdge->getEnd()->getColor() == GC_GRAY) { // X is the endpoint of an edge of V, therefore it is transitive pWXEdge->getEnd()->setColor(GC_BLACK); } } else break; } } } // Stage 2 for(size_t i = 0; i < edges.size(); ++i) { Edge* pVWEdge = edges[i]; Vertex* pWVert = pVWEdge->getEnd(); EdgeDir transDir = !pVWEdge->getTwinDir(); EdgePtrVec w_edges = pWVert->getEdges(transDir); for(size_t j = 0; j < w_edges.size(); ++j) { Edge* pWXEdge = w_edges[j]; size_t len = pWXEdge->getSeqLen(); if(len < FUZZ || j == 0) { if(pWXEdge->getEnd()->getColor() == GC_GRAY) { // X is the endpoint of an edge of V, therefore it is transitive pWXEdge->getEnd()->setColor(GC_BLACK); } } else { break; } } } for(size_t i = 0; i < edges.size(); ++i) { if(edges[i]->getEnd()->getColor() == GC_BLACK) { // Mark the edge and its twin for removal if(edges[i]->getColor() != GC_BLACK || edges[i]->getTwin()->getColor() != GC_BLACK) { edges[i]->setColor(GC_BLACK); edges[i]->getTwin()->setColor(GC_BLACK); marked_edges += 2; trans_count++; } } edges[i]->getEnd()->setColor(GC_WHITE); } } if(trans_count > 0) ++marked_verts; return false; }
bool SGSmoothingVisitor::visit(StringGraph* pGraph, Vertex* pVertex) { (void)pGraph; if(pVertex->getColor() == GC_RED) return false; bool found = false; for(size_t idx = 0; idx < ED_COUNT; idx++) { EdgeDir dir = EDGE_DIRECTIONS[idx]; EdgePtrVec edges = pVertex->getEdges(dir); if(edges.size() <= 1) continue; for(size_t i = 0; i < edges.size(); ++i) { if(edges[i]->getEnd()->getColor() == GC_RED) return false; } //std::cout << "Smoothing " << pVertex->getID() << "\n"; const int MAX_WALKS = 10; const int MAX_DISTANCE = 5000; bool bIsDegenerate = false; bool bFailGapCheck = false; bool bFailDivergenceCheck = false; bool bFailIndelSizeCheck = false; SGWalkVector variantWalks; SGSearch::findVariantWalks(pVertex, dir, MAX_DISTANCE, MAX_WALKS, variantWalks); if(variantWalks.size() > 0) { found = true; size_t selectedIdx = -1; size_t selectedCoverage = 0; // Calculate the minimum amount overlapped on the start/end vertex. // This is used to properly extract the sequences from walks that represent the variation. int minOverlapX = std::numeric_limits<int>::max(); int minOverlapY = std::numeric_limits<int>::max(); for(size_t i = 0; i < variantWalks.size(); ++i) { if(variantWalks[i].getNumEdges() <= 1) bIsDegenerate = true; // Calculate the walk coverage using the internal vertices of the walk. // The walk with the highest coverage will be retained size_t walkCoverage = 0; for(size_t j = 1; j < variantWalks[i].getNumVertices() - 1; ++j) walkCoverage += variantWalks[i].getVertex(j)->getCoverage(); if(walkCoverage > selectedCoverage || selectedCoverage == 0) { selectedIdx = i; selectedCoverage = walkCoverage; } Edge* pFirstEdge = variantWalks[i].getFirstEdge(); Edge* pLastEdge = variantWalks[i].getLastEdge(); if((int)pFirstEdge->getMatchLength() < minOverlapX) minOverlapX = pFirstEdge->getMatchLength(); if((int)pLastEdge->getTwin()->getMatchLength() < minOverlapY) minOverlapY = pLastEdge->getTwin()->getMatchLength(); } // Calculate the strings for each walk that represent the region of variation StringVector walkStrings; for(size_t i = 0; i < variantWalks.size(); ++i) { Vertex* pStartVertex = variantWalks[i].getStartVertex(); Vertex* pLastVertex = variantWalks[i].getLastVertex(); assert(pStartVertex != NULL && pLastVertex != NULL); std::string full = variantWalks[i].getString(SGWT_START_TO_END); int posStart = 0; int posEnd = 0; if(dir == ED_ANTISENSE) { // pLast ----------- // pStart ------------ // full -------------------- // out ---- posStart = pLastVertex->getSeqLen() - minOverlapY; posEnd = full.size() - (pStartVertex->getSeqLen() - minOverlapX); } else { // pStart -------------- // pLast ----------- // full --------------------- // out ---- posStart = pStartVertex->getSeqLen() - minOverlapX; // match start position posEnd = full.size() - (pLastVertex->getSeqLen() - minOverlapY); // match end position } std::string out; if(posEnd > posStart) out = full.substr(posStart, posEnd - posStart); walkStrings.push_back(out); } assert(selectedIdx != (size_t)-1); SGWalk& selectedWalk = variantWalks[selectedIdx]; assert(selectedWalk.isIndexed()); // Check the divergence of the other walks to this walk StringVector cigarStrings; std::vector<int> maxIndel; std::vector<double> gapPercent; // percentage of matching that is gaps std::vector<double> totalPercent; // percent of total alignment that is mismatch or gap cigarStrings.resize(variantWalks.size()); gapPercent.resize(variantWalks.size()); totalPercent.resize(variantWalks.size()); maxIndel.resize(variantWalks.size()); for(size_t i = 0; i < variantWalks.size(); ++i) { if(i == selectedIdx) continue; // We want to compute the total gap length, total mismatches and percent // divergence between the two paths. int matchLen = 0; int totalDiff = 0; int gapLength = 0; int maxGapLength = 0; // We have to handle the degenerate case where one internal string has zero length // this can happen when there is an isolated insertion/deletion and the walks are like: // x -> y -> z // x -> z if(walkStrings[selectedIdx].empty() || walkStrings[i].empty()) { matchLen = std::max(walkStrings[selectedIdx].size(), walkStrings[i].size()); totalDiff = matchLen; gapLength = matchLen; } else { AlnAln *aln_global; aln_global = aln_stdaln(walkStrings[selectedIdx].c_str(), walkStrings[i].c_str(), &aln_param_blast, 1, 1); // Calculate the alignment parameters while(aln_global->outm[matchLen] != '\0') { if(aln_global->outm[matchLen] == ' ') totalDiff += 1; matchLen += 1; } std::stringstream cigarSS; for (int j = 0; j != aln_global->n_cigar; ++j) { char cigarOp = "MID"[aln_global->cigar32[j]&0xf]; int cigarLen = aln_global->cigar32[j]>>4; if(cigarOp == 'I' || cigarOp == 'D') { gapLength += cigarLen; if(gapLength > maxGapLength) maxGapLength = gapLength; } cigarSS << cigarLen; cigarSS << cigarOp; } cigarStrings[i] = cigarSS.str(); aln_free_AlnAln(aln_global); } double percentDiff = (double)totalDiff / matchLen; double percentGap = (double)gapLength / matchLen; if(percentDiff > m_maxTotalDivergence) bFailDivergenceCheck = true; if(percentGap > m_maxGapDivergence) bFailGapCheck = true; if(maxGapLength > m_maxIndelLength) bFailIndelSizeCheck = true; gapPercent[i] = percentGap; totalPercent[i] = percentDiff; maxIndel[i] = maxGapLength; } if(bIsDegenerate || bFailGapCheck || bFailDivergenceCheck || bFailIndelSizeCheck) continue; // Write the selected path to the variants file as variant 0 int variantIdx = 0; std::string selectedSequence = selectedWalk.getString(SGWT_START_TO_END); std::stringstream ss; ss << "variant-" << m_numRemovedTotal << "/" << variantIdx++; writeFastaRecord(&m_outFile, ss.str(), selectedSequence); // The vertex set for each walk is not necessarily disjoint, // the selected walk may contain vertices that are part // of other paths. We handle this be initially marking all // vertices of the for(size_t i = 0; i < variantWalks.size(); ++i) { if(i == selectedIdx) continue; SGWalk& currWalk = variantWalks[i]; for(size_t j = 0; j < currWalk.getNumEdges() - 1; ++j) { Edge* currEdge = currWalk.getEdge(j); // If the vertex is also on the selected path, do not mark it Vertex* currVertex = currEdge->getEnd(); if(!selectedWalk.containsVertex(currVertex->getID())) { currEdge->getEnd()->setColor(GC_RED); } } // Write the variant to a file std::string variantSequence = currWalk.getString(SGWT_START_TO_END); std::stringstream variantID; std::stringstream ss; ss << "variant-" << m_numRemovedTotal << "/" << variantIdx++; ss << " IGD:" << (double)gapPercent[i] << " ITD:" << totalPercent[i] << " MID: " << maxIndel[i] << " InternalCigar:" << cigarStrings[i]; writeFastaRecord(&m_outFile, ss.str(), variantSequence); } if(variantWalks.size() == 2) m_simpleBubblesRemoved += 1; else m_complexBubblesRemoved += 1; ++m_numRemovedTotal; } }
size_t Vertex::countEdges(EdgeDir dir) { EdgePtrVec ev = getEdges(dir); return ev.size(); }
// // SGPETrustVisitor - determines which edges in the // string graph are "trusted" - the reads overlapping // in the edge have pairs that also overlap // bool SGPETrustVisitor::visit(StringGraph* /*pGraph*/, Vertex* /*pVertex*/) { #if 0 Vertex* pPairVertex = pVertex->getPairVertex(); if(pPairVertex == NULL) return false; // First, mark all pair vertices that overlap the pair of this node // The set of marked vertices that overlap pVertex are the trusted vertices EdgePtrVec pairEdgeVec = pPairVertex->getEdges(); for(size_t i = 0; i < pairEdgeVec.size(); ++i) { // Get the pair of the endpoint of this edge Vertex* pBackVertex = pairEdgeVec[i]->getEnd()->getPairVertex(); if(pBackVertex != NULL) pBackVertex->setColor(GC_RED); } EdgePtrVec vertEdgeVec = pVertex->getEdges(); bool changed = true; while(changed) { changed = false; // Propogate trust for(size_t i = 0; i < vertEdgeVec.size(); ++i) { Vertex* pCurr = vertEdgeVec[i]->getEnd(); if(pCurr->getColor() != GC_RED) { // If any vertex that pCurr overlaps with is red, mark it red too EdgePtrVec currEdgeVec = pCurr->getEdges(); for(size_t j = 0; j < currEdgeVec.size(); ++j) { if(currEdgeVec[j]->getEnd()->getColor() == GC_RED) { pCurr->setColor(GC_RED); changed = true; break; } } } } } // int trusted = 0; int nottrusted = 0; int diffstrand = 0; for(size_t i = 0; i < vertEdgeVec.size(); ++i) { if(vertEdgeVec[i]->getEnd()->getColor() == GC_RED) { trusted++; vertEdgeVec[i]->isTrusted = true; } else { nottrusted++; } } (void)diffstrand; //printf("TOKEN\t%d\t%d\t%d\t%zu\n", trusted, nottrusted, diffstrand, vertEdgeVec.size()); // Reset all the vertex colors for(size_t i = 0; i < pairEdgeVec.size(); ++i) { // Get the pair of the endpoint of this edge Vertex* pBackVertex = pairEdgeVec[i]->getEnd()->getPairVertex(); if(pBackVertex) pBackVertex->setColor(GC_WHITE); } for(size_t i = 0; i < vertEdgeVec.size(); ++i) vertEdgeVec[i]->getEnd()->setColor(GC_WHITE); #endif return false; }