//This function makes a double edge: in one direction for the given nodes //and the opposite direction for their reverse complements. It adds the //new edges to the vector here and to the nodes themselves. void AssemblyGraph::createDeBruijnEdge(QString node1Name, QString node2Name, int overlap) { QString node1Opposite = getOppositeNodeName(node1Name); QString node2Opposite = getOppositeNodeName(node2Name); //Quit if any of the nodes don't exist. if (!m_deBruijnGraphNodes.contains(node1Name) || !m_deBruijnGraphNodes.contains(node2Name) || !m_deBruijnGraphNodes.contains(node1Opposite) || !m_deBruijnGraphNodes.contains(node2Opposite)) return; DeBruijnNode * node1 = m_deBruijnGraphNodes[node1Name]; DeBruijnNode * node2 = m_deBruijnGraphNodes[node2Name]; DeBruijnNode * negNode1 = m_deBruijnGraphNodes[node1Opposite]; DeBruijnNode * negNode2 = m_deBruijnGraphNodes[node2Opposite]; //Quit if the edge already exists const std::vector<DeBruijnEdge *> * edges = node1->getEdgesPointer(); for (size_t i = 0; i < edges->size(); ++i) { if ((*edges)[i]->getStartingNode() == node1 && (*edges)[i]->getEndingNode() == node2) return; } //Usually, an edge has a different pair, but it is possible //for an edge to be its own pair. bool isOwnPair = (node1 == negNode2 && node2 == negNode1); DeBruijnEdge * forwardEdge = new DeBruijnEdge(node1, node2); DeBruijnEdge * backwardEdge; if (isOwnPair) backwardEdge = forwardEdge; else backwardEdge = new DeBruijnEdge(negNode2, negNode1); forwardEdge->setReverseComplement(backwardEdge); backwardEdge->setReverseComplement(forwardEdge); forwardEdge->setOverlap(overlap); backwardEdge->setOverlap(overlap); m_deBruijnGraphEdges.push_back(forwardEdge); if (!isOwnPair) m_deBruijnGraphEdges.push_back(backwardEdge); node1->addEdge(forwardEdge); node2->addEdge(forwardEdge); negNode1->addEdge(backwardEdge); negNode2->addEdge(backwardEdge); }
void AssemblyGraph::autoDetermineAllEdgesExactOverlap() { int edgeCount = m_deBruijnGraphEdges.size(); if (edgeCount == 0) return; //Determine the overlap for each edge and produce a vector //that for (size_t i = 0; i < m_deBruijnGraphEdges.size(); ++i) m_deBruijnGraphEdges[i]->autoDetermineExactOverlap(); //The expectation here is that most overlaps will be //the same or from a small subset of possible sizes. //Edges with an overlap that do not match the most common //overlap(s) are suspected of having their overlap //misidentified. They are therefore rechecked using the //common ones. std::vector<int> overlapCounts = makeOverlapCountVector(); //Sort the overlaps in order of decreasing numbers of edges. //I.e. the first overlap size in the vector will be the most //common overlap, the second will be the second most common, //etc. std::vector<int> sortedOverlaps; int overlapsSoFar = 0; double fractionOverlapsFound = 0.0; while (fractionOverlapsFound < 1.0) { int mostCommonOverlap = 0; int mostCommonOverlapCount = 0; //Find the overlap size with the most instances. for (size_t i = 0; i < overlapCounts.size(); ++i) { if (overlapCounts[i] > mostCommonOverlapCount) { mostCommonOverlap = i; mostCommonOverlapCount = overlapCounts[i]; } } //Add that overlap to the common collection and remove it from the counts. sortedOverlaps.push_back(mostCommonOverlap); overlapsSoFar += mostCommonOverlapCount; fractionOverlapsFound = double(overlapsSoFar) / edgeCount; overlapCounts[mostCommonOverlap] = 0; } //For each edge, see if one of the more common overlaps also works. //If so, use that instead. for (size_t i = 0; i < m_deBruijnGraphEdges.size(); ++i) { DeBruijnEdge * edge = m_deBruijnGraphEdges[i]; for (size_t j = 0; j < sortedOverlaps.size(); ++j) { if (edge->getOverlap() == sortedOverlaps[j]) break; else if (edge->testExactOverlap(sortedOverlaps[j])) { edge->setOverlap(sortedOverlaps[j]); break; } } } }