double AssemblyGraph::getMeanDeBruijnGraphReadDepth(bool drawnNodesOnly)
{
    int nodeCount = 0;
    long double readDepthSum = 0.0;
    long long totalLength = 0;

    QMapIterator<QString, DeBruijnNode*> i(m_deBruijnGraphNodes);
    while (i.hasNext())
    {
        i.next();
        DeBruijnNode * node = i.value();

        if (drawnNodesOnly && node->isNotDrawn())
            continue;

        ++nodeCount;
        totalLength += node->getLength();
        readDepthSum += node->getLength() * node->getReadDepth();

    }

    if (totalLength == 0)
        return 0.0;
    else
        return readDepthSum / totalLength;
}
示例#2
0
//This function recursively labels all nodes as drawn that are within a
//certain distance of this node.  Whichever node called this will
//definitely be drawn, so that one is excluded from the recursive call.
void DeBruijnNode::labelNeighbouringNodesAsDrawn(int nodeDistance, DeBruijnNode * callingNode)
{
    if (m_highestDistanceInNeighbourSearch > nodeDistance)
        return;
    m_highestDistanceInNeighbourSearch = nodeDistance;

    if (nodeDistance == 0)
        return;

    DeBruijnNode * otherNode;
    for (size_t i = 0; i < m_edges.size(); ++i)
    {
        otherNode = m_edges[i]->getOtherNode(this);

        if (otherNode == callingNode)
            continue;

        if (g_settings->doubleMode)
            otherNode->m_drawn = true;
        else //single mode
        {
            if (otherNode->isPositiveNode())
                otherNode->m_drawn = true;
            else
                otherNode->getReverseComplement()->m_drawn = true;
        }
        otherNode->labelNeighbouringNodesAsDrawn(nodeDistance-1, this);
    }
}
示例#3
0
long AssemblyJobSsDeBruijn::combineTreesMidHelp(SeqNode* topNodeA, int kmerSize,
						SeqNode* branchA, SeqNode* branchB, int remainingKmer){
  char nucs[] = {'A','T','C','G'};
  long newNodeSum = 0;
  if (remainingKmer == 1){
    for (int n = 0; n < 4; ++n){
      DeBruijnNode* newNodeB = dynamic_cast<DeBruijnNode*>(branchB->getBranch(nucs[n]));
      if (newNodeB != 0){
	DeBruijnNode* newNodeA = dynamic_cast<DeBruijnNode*>(branchA->getBranch(nucs[n]));
	if (newNodeA == 0){
	  newNodeA = new DeBruijnNode(branchA,nucs[n]);
	  branchA->addBranch( newNodeA );
	  newNodeSum += 1;
	}
	newNodeA->addKmerScore( newNodeB->getKmerScore() );
	newNodeSum += combineTreesBottomHelp(topNodeA, kmerSize, newNodeA, newNodeB);
      }
    }
  } else {
    for (int n = 0; n < 4; ++n){
      SeqNode* newNodeB = branchB->getBranch(nucs[n]);
      if (newNodeB != 0){
	SeqNode* newNodeA = branchA->getBranch(nucs[n]);
	if (newNodeA == 0){
	  newNodeA = new NucNode(branchA,nucs[n]);
	  branchA->addBranch( newNodeA );
	}
	newNodeSum += combineTreesMidHelp(topNodeA, kmerSize, newNodeA, newNodeB, remainingKmer-1);
      }
    }
  }
  return newNodeSum;
}
示例#4
0
void AssemblyJobSsDeBruijn::testTreeConstructionHelper(set<ScoredSeq*>* foundKmers, SeqNode* node, int stepsToBottom){
  if (stepsToBottom == 0){
    DeBruijnNode* dbNode = dynamic_cast<DeBruijnNode*>(node);
    foundKmers->insert( new ScoredSeqMonoScore(dbNode->getKmer(), dbNode->getKmerScore()) );
    //foundKmers->insert( ScoredSeq::getScoredSeq(dbNode->getKmer(), dbNode->getKmerScore()) );
  } else {
    char nucList[] = {'A','T','C','G'};
    for (int n = 0; n < 4; ++n){
      SeqNode* nextNode = node->getBranch( nucList[n] );
      if (nextNode != 0){ testTreeConstructionHelper( foundKmers, nextNode, stepsToBottom-1 ); }
    }
  }
}
示例#5
0
BlastQueryPath::BlastQueryPath(Path path, BlastQuery * query) :
    m_path(path), m_query(query)
{

    //This function follows the path, returning the BLAST hits it finds for the
    //query.  It requires that the hits occur in order, i.e. that each hit in
    //the path begins later in the query than the previous hit.

    BlastHit * previousHit = 0;
    QList<DeBruijnNode *> pathNodes = m_path.getNodes();
    for (int i = 0; i < pathNodes.size(); ++i)
    {
        DeBruijnNode * node = pathNodes[i];

        QList<BlastHit *> hitsThisNode;
        QList< QSharedPointer<BlastHit> > queryHits = query->getHits();
        for (int j = 0; j < queryHits.size(); ++j)
        {
            BlastHit * hit = queryHits[j].data();
            if (hit->m_node->getName() == node->getName())
                hitsThisNode.push_back(hit);
        }

        std::sort(hitsThisNode.begin(), hitsThisNode.end(),
                  BlastHit::compareTwoBlastHitPointers);

        for (int j = 0; j < hitsThisNode.size(); ++j)
        {
            BlastHit * hit = hitsThisNode[j];

            //First check to make sure the hits are within the path.  This means
            //if we are in the first or last nodes of the path, we need to make
            //sure that our hit is contained within the start/end positions.
            if ( (i != 0 || hit->m_nodeStart >= m_path.getStartLocation().getPosition()) &&
                    (i != pathNodes.size()-1 || hit->m_nodeEnd <= m_path.getEndLocation().getPosition()))
            {
                //Now make sure that the hit follows the previous hit in the
                //query.
                if (previousHit == 0 ||
                        hit->m_queryStart > previousHit->m_queryStart)
                {
                    m_hits.push_back(hit);
                    previousHit = hit;
                }
            }
        }
    }
}
示例#6
0
void GraphicsItemEdge::calculateAndSetPath()
{
    setControlPointLocations();

    double edgeDistance = QLineF(m_startingLocation, m_endingLocation).length();

    double extensionLength = g_settings->segmentLength;
    if (extensionLength > edgeDistance / 2.0)
        extensionLength = edgeDistance / 2.0;

    m_controlPoint1 = extendLine(m_beforeStartingLocation, m_startingLocation, extensionLength);
    m_controlPoint2 = extendLine(m_afterEndingLocation, m_endingLocation, extensionLength);


    //If this edge is connecting a node to itself, and that node
    //is made of only one line segment, then a special path is
    //required, otherwise the edge will be mostly hidden underneath
    //the node.
    DeBruijnNode * startingNode = m_deBruijnEdge->getStartingNode();
    DeBruijnNode * endingNode = m_deBruijnEdge->getEndingNode();
    if (startingNode == endingNode)
    {
        GraphicsItemNode * graphicsItemNode = startingNode->getGraphicsItemNode();
        if (graphicsItemNode == 0)
            graphicsItemNode = startingNode->getReverseComplement()->getGraphicsItemNode();
        if (graphicsItemNode != 0 && graphicsItemNode->m_linePoints.size() == 2)
        {
            makeSpecialPathConnectingNodeToSelf();
            return;
        }
    }

    //If we are in single mode and the edge connects a node to its reverse
    //complement, then we need a special path to make it visible.
    if (startingNode == endingNode->getReverseComplement() &&
            !g_settings->doubleMode)
    {
        makeSpecialPathConnectingNodeToReverseComplement();
        return;
    }

    //Otherwise, the path is just a single cubic Bezier curve.
    QPainterPath path;
    path.moveTo(m_startingLocation);
    path.cubicTo(m_controlPoint1, m_controlPoint2, m_endingLocation);

    setPath(path);
}
void AssemblyGraph::addGraphicsItemsToScene(MyGraphicsScene * scene)
{
    scene->clear();

    double meanDrawnReadDepth = getMeanDeBruijnGraphReadDepth(true);

    //First make the GraphicsItemNode objects
    QMapIterator<QString, DeBruijnNode*> i(m_deBruijnGraphNodes);
    while (i.hasNext())
    {
        i.next();
        DeBruijnNode * node = i.value();

        if (node->isDrawn())
        {
            if (meanDrawnReadDepth == 0)
                node->setReadDepthRelativeToMeanDrawnReadDepth(1.0);
            else
                node->setReadDepthRelativeToMeanDrawnReadDepth(node->getReadDepth() / meanDrawnReadDepth);
            GraphicsItemNode * graphicsItemNode = new GraphicsItemNode(node, m_graphAttributes);
            node->setGraphicsItemNode(graphicsItemNode);
            graphicsItemNode->setFlag(QGraphicsItem::ItemIsSelectable);
            graphicsItemNode->setFlag(QGraphicsItem::ItemIsMovable);
        }
    }

    resetAllNodeColours();

    //Then make the GraphicsItemEdge objects and add them to the scene first
    //so they are drawn underneath
    for (size_t i = 0; i < m_deBruijnGraphEdges.size(); ++i)
    {
        if (m_deBruijnGraphEdges[i]->isDrawn())
        {
            GraphicsItemEdge * graphicsItemEdge = new GraphicsItemEdge(m_deBruijnGraphEdges[i]);
            m_deBruijnGraphEdges[i]->setGraphicsItemEdge(graphicsItemEdge);
            graphicsItemEdge->setFlag(QGraphicsItem::ItemIsSelectable);
            scene->addItem(graphicsItemEdge);
        }
    }

    //Now add the GraphicsItemNode objects to the scene so they are drawn
    //on top
    QMapIterator<QString, DeBruijnNode*> j(m_deBruijnGraphNodes);
    while (j.hasNext())
    {
        j.next();
        DeBruijnNode * node = j.value();
        if (node->hasGraphicsItem())
            scene->addItem(node->getGraphicsItemNode());
    }
}
示例#8
0
//This function differs from the above by including all reverse complement
//nodes in the path search.
std::vector<DeBruijnNode *> DeBruijnNode::getNodesCommonToAllPaths(std::vector< std::vector <DeBruijnNode *> > * paths,
                                                                   bool includeReverseComplements) const
{
    std::vector<DeBruijnNode *> commonNodes;

    //If there are no paths, then return the empty vector.
    if (paths->size() == 0)
        return commonNodes;

    //If there is only one path in path, then they are all common nodes
    commonNodes = (*paths)[0];
    if (paths->size() == 1)
        return commonNodes;

    //If there are two or more paths, it's necessary to find the intersection.
    for (size_t i = 1; i < paths->size(); ++i)
    {
        QApplication::processEvents();
        std::vector <DeBruijnNode *> * path = &((*paths)[i]);

        //If we are including reverse complements in the search,
        //then it is necessary to build a new vector that includes
        //reverse complement nodes and then use that vector.
        std::vector <DeBruijnNode *> pathWithReverseComplements;
        if (includeReverseComplements)
        {
            for (size_t j = 0; j < path->size(); ++j)
            {
                DeBruijnNode * node = (*path)[j];
                pathWithReverseComplements.push_back(node);
                pathWithReverseComplements.push_back(node->getReverseComplement());
            }
            path = &pathWithReverseComplements;
        }

        //Combine the commonNodes vector with the path vector,
        //excluding any repeats.
        std::sort(commonNodes.begin(), commonNodes.end());
        std::sort(path->begin(), path->end());
        std::vector<DeBruijnNode *> newCommonNodes;
        std::set_intersection(commonNodes.begin(), commonNodes.end(), path->begin(), path->end(), std::back_inserter(newCommonNodes));
        commonNodes = newCommonNodes;
    }

    return commonNodes;
}
示例#9
0
QByteArray DeBruijnNode::getUpstreamSequence(int upstreamSequenceLength) const
{
    std::vector<DeBruijnNode*> upstreamNodes = getUpstreamNodes();

    QByteArray bestUpstreamNodeSequence;

    for (size_t i = 0; i < upstreamNodes.size(); ++i)
    {
        DeBruijnNode * upstreamNode = upstreamNodes[i];
        QByteArray upstreamNodeFullSequence = upstreamNode->getSequence();
        QByteArray upstreamNodeSequence;

        //If the upstream node has enough sequence, great!
        if (upstreamNodeFullSequence.length() >= upstreamSequenceLength)
            upstreamNodeSequence = upstreamNodeFullSequence.right(upstreamSequenceLength);

        //If the upstream node does not have enough sequence, then we need to
        //look even further upstream.
        else
            upstreamNodeSequence = upstreamNode->getUpstreamSequence(upstreamSequenceLength - upstreamNodeFullSequence.length()) + upstreamNodeFullSequence;

        //If we now have enough sequence, then we can return it.
        if (upstreamNodeSequence.length() == upstreamSequenceLength)
            return upstreamNodeSequence;

        //If we don't have enough sequence, then we need to try the next
        //upstream node.  If our current one is the best so far, save that in
        //case no complete sequence is found.
        if (upstreamNodeSequence.length() > bestUpstreamNodeSequence.length())
            bestUpstreamNodeSequence = upstreamNodeSequence;
    }

    //If the code got here, that means that not enough upstream sequence was
    //found in any path!  Return what we have managed to get so far.
    return bestUpstreamNodeSequence;
}
//The startingNodes and nodeDistance parameters are only used if the graph scope
//is not WHOLE_GRAPH.
void AssemblyGraph::buildOgdfGraphFromNodesAndEdges(std::vector<DeBruijnNode *> startingNodes, int nodeDistance)
{
    if (g_settings->graphScope == WHOLE_GRAPH)
    {
        QMapIterator<QString, DeBruijnNode*> i(m_deBruijnGraphNodes);
        while (i.hasNext())
        {
            i.next();

            //If double mode is off, only positive nodes are drawn.  If it's
            //on, all nodes are drawn.
            if (i.value()->isPositiveNode() || g_settings->doubleMode)
                i.value()->setAsDrawn();
        }
    }
    else //The scope is either around specified nodes or around nodes with BLAST hits
    {
        for (size_t i = 0; i < startingNodes.size(); ++i)
        {
            DeBruijnNode * node = startingNodes[i];

            //If we are in single mode, make sure that each node is positive.
            if (!g_settings->doubleMode && node->isNegativeNode())
                node = node->getReverseComplement();

            node->setAsDrawn();
            node->setAsSpecial();
            node->labelNeighbouringNodesAsDrawn(nodeDistance, 0);
        }
    }

    //First loop through each node, adding it to OGDF if it is drawn.
    QMapIterator<QString, DeBruijnNode*> i(m_deBruijnGraphNodes);
    while (i.hasNext())
    {
        i.next();
        if (i.value()->isDrawn())
            i.value()->addToOgdfGraph(m_ogdfGraph);
    }

    //Then loop through each determining its drawn status and adding it
    //to OGDF if it is drawn.
    for (size_t i = 0; i < m_deBruijnGraphEdges.size(); ++i)
    {
        m_deBruijnGraphEdges[i]->determineIfDrawn();
        if (m_deBruijnGraphEdges[i]->isDrawn())
            m_deBruijnGraphEdges[i]->addToOgdfGraph(m_ogdfGraph);
    }
}
void AssemblyGraph::pointEachNodeToItsReverseComplement()
{
    QMapIterator<QString, DeBruijnNode*> i(m_deBruijnGraphNodes);
    while (i.hasNext())
    {
        i.next();
        DeBruijnNode * positiveNode = i.value();

        if (positiveNode->isPositiveNode())
        {
            DeBruijnNode * negativeNode = m_deBruijnGraphNodes[getOppositeNodeName(positiveNode->getName())];
            if (negativeNode != 0)
            {
                positiveNode->setReverseComplement(negativeNode);
                negativeNode->setReverseComplement(positiveNode);
            }
        }
    }
}
void AssemblyGraph::buildDeBruijnGraphFromFastg(QString fullFileName)
{
    m_graphFileType = FASTG;

    QFile inputFile(fullFileName);
    if (inputFile.open(QIODevice::ReadOnly))
    {
        std::vector<QString> edgeStartingNodeNames;
        std::vector<QString> edgeEndingNodeNames;
        DeBruijnNode * node = 0;

        QTextStream in(&inputFile);
        while (!in.atEnd())
        {
            QApplication::processEvents();

            QString nodeName;
            double nodeReadDepth;

            QString line = in.readLine();

            //If the line starts with a '>', then we are beginning a new node.
            if (line.startsWith(">"))
            {
                line.remove(0, 1); //Remove '>' from start
                line.chop(1); //Remove ';' from end
                QStringList nodeDetails = line.split(":");

                QString thisNode = nodeDetails.at(0);

                //A single quote as the last character indicates a negative node.
                bool negativeNode = thisNode.at(thisNode.size() - 1) == '\'';

                QStringList thisNodeDetails = thisNode.split("_");

                if (thisNodeDetails.size() < 6)
                    throw "load error";

                nodeName = thisNodeDetails.at(1);
                if (negativeNode)
                    nodeName += "-";
                else
                    nodeName += "+";

                QString nodeReadDepthString = thisNodeDetails.at(5);
                if (negativeNode)
                {
                    //It may be necessary to remove a single quote from the end of the read depth
                    if (nodeReadDepthString.at(nodeReadDepthString.size() - 1) == '\'')
                        nodeReadDepthString.chop(1);
                }
                nodeReadDepth = nodeReadDepthString.toDouble();

                //Make the node
                node = new DeBruijnNode(nodeName, nodeReadDepth, ""); //Sequence string is currently empty - will be added to on subsequent lines of the fastg file
                m_deBruijnGraphNodes.insert(nodeName, node);

                //The second part of nodeDetails is a comma-delimited list of edge nodes.
                //Edges aren't made right now (because the other node might not yet exist),
                //so they are saved into vectors and made after all the nodes have been made.
                if (nodeDetails.size() == 1)
                    continue;
                QStringList edgeNodes = nodeDetails.at(1).split(",");
                for (int i = 0; i < edgeNodes.size(); ++i)
                {
                    QString edgeNode = edgeNodes.at(i);

                    QChar lastChar = edgeNode.at(edgeNode.size() - 1);
                    bool negativeNode = false;
                    if (lastChar == '\'')
                    {
                        negativeNode = true;
                        edgeNode.chop(1);
                    }
                    QStringList edgeNodeDetails = edgeNode.split("_");

                    if (edgeNodeDetails.size() < 2)
                        throw "load error";

                    QString edgeNodeName = edgeNodeDetails.at(1);
                    if (negativeNode)
                        edgeNodeName += "-";
                    else
                        edgeNodeName += "+";

                    edgeStartingNodeNames.push_back(nodeName);
                    edgeEndingNodeNames.push_back(edgeNodeName);
                }
            }

            //If the line does not start with a '>', then this line is part of the
            //sequence for the last node.
            else
            {
                QByteArray sequenceLine = line.simplified().toLocal8Bit();
                if (node != 0)
                    node->appendToSequence(sequenceLine);
            }
        }

        inputFile.close();

        //If all went well, each node will have a reverse complement and the code
        //will never get here.  However, I have noticed that some SPAdes fastg files
        //have, for some reason, negative nodes with no positive counterpart.  For
        //that reason, we will now make any reverse complement nodes for nodes that
        //lack them.
        QMapIterator<QString, DeBruijnNode*> i(m_deBruijnGraphNodes);
        while (i.hasNext())
        {
            i.next();
            DeBruijnNode * node = i.value();
            makeReverseComplementNodeIfNecessary(node);
        }
        pointEachNodeToItsReverseComplement();


        //Create all of the edges
        for (size_t i = 0; i < edgeStartingNodeNames.size(); ++i)
        {
            QString node1Name = edgeStartingNodeNames[i];
            QString node2Name = edgeEndingNodeNames[i];
            createDeBruijnEdge(node1Name, node2Name);
        }
    }

    autoDetermineAllEdgesExactOverlap();

    if (m_deBruijnGraphNodes.size() == 0)
        throw "load error";
}
示例#13
0
void GraphicsItemEdge::setControlPointLocations()
{
    DeBruijnNode * startingNode = m_deBruijnEdge->getStartingNode();
    DeBruijnNode * endingNode = m_deBruijnEdge->getEndingNode();

    if (startingNode->hasGraphicsItem())
    {
        m_startingLocation = startingNode->getGraphicsItemNode()->getLast();
        m_beforeStartingLocation = startingNode->getGraphicsItemNode()->getSecondLast();
    }
    else if (startingNode->getReverseComplement()->hasGraphicsItem())
    {
        m_startingLocation = startingNode->getReverseComplement()->getGraphicsItemNode()->getFirst();
        m_beforeStartingLocation = startingNode->getReverseComplement()->getGraphicsItemNode()->getSecond();
    }

    if (endingNode->hasGraphicsItem())
    {
        m_endingLocation = endingNode->getGraphicsItemNode()->getFirst();
        m_afterEndingLocation = endingNode->getGraphicsItemNode()->getSecond();
    }
    else if (endingNode->getReverseComplement()->hasGraphicsItem())
    {
        m_endingLocation = endingNode->getReverseComplement()->getGraphicsItemNode()->getLast();
        m_afterEndingLocation = endingNode->getReverseComplement()->getGraphicsItemNode()->getSecondLast();
    }
}
示例#14
0
void AssemblyJobSsDeBruijn::findBestLinks(DeBruijnGraph* graph){
  char nucs[] = {'A','T','C','G'};
  for (DeBruijnGraph::Iterator nodeIt = graph->begin(); nodeIt != graph->end(); ++nodeIt){
    DeBruijnNode* currentNode = *nodeIt;
    DeBruijnNode* best5pLink = 0;
    float best5pLinkScore = 0;
    DeBruijnNode* best3pLink = 0;
    float best3pLinkScore = 0;
    for (int n = 0; n < 4; ++n){
      DeBruijnNode* cand5pLink = currentNode->get5pLink(nucs[n]);
      if (cand5pLink != 0){
	// take the worse of the two linkage scores
	float next5pLinkScore;
	if (currentNode->get5pLinkScore(nucs[n]) > cand5pLink->get3pLinkScore(currentNode->get3pNuc()) ){
	  next5pLinkScore = cand5pLink->get3pLinkScore(currentNode->get3pNuc());
	} else { next5pLinkScore = currentNode->get5pLinkScore(nucs[n]); }
	if (next5pLinkScore > best5pLinkScore){
	  best5pLinkScore = next5pLinkScore;
	  best5pLink = cand5pLink;
	}
      }
      DeBruijnNode* cand3pLink = currentNode->get3pLink(nucs[n]);
      if (cand3pLink != 0){
	// take the worse of the two linkage scores
	float next3pLinkScore;
	if (currentNode->get3pLinkScore(nucs[n]) > cand3pLink->get5pLinkScore(currentNode->get5pNuc()) ){
	  next3pLinkScore = cand3pLink->get5pLinkScore(currentNode->get5pNuc());
	} else { next3pLinkScore = currentNode->get3pLinkScore(nucs[n]); }
	if (next3pLinkScore > best3pLinkScore){
	  best3pLinkScore = next3pLinkScore;
	  best3pLink = cand3pLink;
	}
      }
    }
    currentNode->setBest5pLink(best5pLink);
    currentNode->setBest3pLink(best3pLink);
  }

  // FOURTH resolve best-edge conflicts
  for (DeBruijnGraph::Iterator nodeIt = graph->begin(); nodeIt != graph->end(); ++nodeIt){
    DeBruijnNode* currentNode = *nodeIt;
    DeBruijnNode* best5pLink = currentNode->best5pLink();
    if (best5pLink != 0 and best5pLink->best3pLink() != currentNode){
      DeBruijnNode* bestRecip = best5pLink->best3pLink();
      if (bestRecip == 0){ best5pLink->setBest3pLink(currentNode); }
      else { currentNode->setBest5pLink( 0 ); }
    }
    DeBruijnNode* best3pLink = currentNode->best3pLink();
    if (best3pLink != 0 and best3pLink->best5pLink() != currentNode){
      DeBruijnNode* bestRecip = best3pLink->best5pLink();
      if (bestRecip == 0){ best3pLink->setBest5pLink(currentNode); }
      else { currentNode->setBest3pLink( 0 ); }
    }
  }
}
示例#15
0
//This function determines the contiguity of nodes relative to this one.
//It has two steps:
// -First, for each edge leaving this node, all paths outward are found.
//  Any nodes in any path are MAYBE_CONTIGUOUS, and nodes in all of the
//  paths are CONTIGUOUS.
// -Second, it is necessary to check in the opposite direction - for each
//  of the MAYBE_CONTIGUOUS nodes, do they have a path that unambiguously
//  leads to this node?  If so, then they are CONTIGUOUS.
void DeBruijnNode::determineContiguity()
{
    upgradeContiguityStatus(STARTING);

    //A set is used to store all nodes found in the paths, as the nodes
    //that show up as MAYBE_CONTIGUOUS will have their paths checked
    //to this node.
    std::set<DeBruijnNode *> allCheckedNodes;

    //For each path leaving this node, find all possible paths
    //outward.  Nodes in any of the paths for an edge are
    //MAYBE_CONTIGUOUS.  Nodes in all of the paths for an edge
    //are CONTIGUOUS.
    for (size_t i = 0; i < m_edges.size(); ++i)
    {
        DeBruijnEdge * edge = m_edges[i];
        bool outgoingEdge = (this == edge->getStartingNode());

        std::vector< std::vector <DeBruijnNode *> > allPaths;
        edge->tracePaths(outgoingEdge, g_settings->contiguitySearchSteps, &allPaths, this);

        //Set all nodes in the paths as MAYBE_CONTIGUOUS
        for (size_t j = 0; j < allPaths.size(); ++j)
        {
            QApplication::processEvents();
            for (size_t k = 0; k < allPaths[j].size(); ++k)
            {
                DeBruijnNode * node = allPaths[j][k];
                node->upgradeContiguityStatus(MAYBE_CONTIGUOUS);
                allCheckedNodes.insert(node);
            }
        }

        //Set all common nodes as CONTIGUOUS_STRAND_SPECIFIC
        std::vector<DeBruijnNode *> commonNodesStrandSpecific = getNodesCommonToAllPaths(&allPaths, false);
        for (size_t j = 0; j < commonNodesStrandSpecific.size(); ++j)
            (commonNodesStrandSpecific[j])->upgradeContiguityStatus(CONTIGUOUS_STRAND_SPECIFIC);

        //Set all common nodes (when including reverse complement nodes)
        //as CONTIGUOUS_EITHER_STRAND
        std::vector<DeBruijnNode *> commonNodesEitherStrand = getNodesCommonToAllPaths(&allPaths, true);
        for (size_t j = 0; j < commonNodesEitherStrand.size(); ++j)
        {
            DeBruijnNode * node = commonNodesEitherStrand[j];
            node->upgradeContiguityStatus(CONTIGUOUS_EITHER_STRAND);
            node->getReverseComplement()->upgradeContiguityStatus(CONTIGUOUS_EITHER_STRAND);
        }
    }

    //For each node that was checked, then we check to see if any
    //of its paths leads unambiuously back to the starting node (this node).
    for (std::set<DeBruijnNode *>::iterator i = allCheckedNodes.begin(); i != allCheckedNodes.end(); ++i)
    {
        QApplication::processEvents();
        DeBruijnNode * node = *i;
        ContiguityStatus status = node->getContiguityStatus();

        //First check without reverse complement target for
        //strand-specific contiguity.
        if (status != CONTIGUOUS_STRAND_SPECIFIC &&
                node->doesPathLeadOnlyToNode(this, false))
            node->upgradeContiguityStatus(CONTIGUOUS_STRAND_SPECIFIC);

        //Now check including the reverse complement target for
        //either strand contiguity.
        if (status != CONTIGUOUS_STRAND_SPECIFIC &&
                status != CONTIGUOUS_EITHER_STRAND &&
                node->doesPathLeadOnlyToNode(this, true))
        {
            node->upgradeContiguityStatus(CONTIGUOUS_EITHER_STRAND);
            node->getReverseComplement()->upgradeContiguityStatus(CONTIGUOUS_EITHER_STRAND);
        }
    }
}
示例#16
0
ScoredSeq* AssemblyJobSsDeBruijn::makeSeqFromPath(DeBruijnNode* node5p, DeBruijnNode* node3p, long numNodes){
  long seqLen = _kmerSize + numNodes - 1;
  char* cSeq = new char[seqLen + 1];
  cSeq[seqLen] = '\0';
  float* scores = new float[seqLen];
  float* links = new float[seqLen-1];

  // do the initial kmer and any overlapping positions
  node5p->getKmer(cSeq,0);
  float firstScore = node5p->getKmerScore();
  for (int n = 0; n < _kmerSize; ++n){
    scores[n] = firstScore;
    if (n < _kmerSize - 1){ links[n] = firstScore; }
  }

  // fill in the rest
  DeBruijnNode* currentNode = node5p;
  DeBruijnNode* priorNode = 0;
  long pos5p = 0;
  long pos3p = _kmerSize - 1;
  while (currentNode != node3p){
    // increment
    pos5p++;
    pos3p++;
    priorNode = currentNode;
    currentNode = currentNode->best3pLink();
    // fill in values
    cSeq[pos3p] = currentNode->get3pNuc();
    scores[pos3p] = currentNode->getKmerScore();
    links[pos3p-1] = priorNode->get3pLinkScore( currentNode->get3pNuc() );
    if (links[pos5p-1] < currentNode->get5pLinkScore( cSeq[pos5p-1] )){
      links[pos5p-1] = currentNode->get5pLinkScore( cSeq[pos5p-1] );
    }
    for (long pos = pos5p; pos < pos3p; ++pos){
      if (scores[pos] < currentNode->getKmerScore()){ scores[pos] = currentNode->getKmerScore(); }
    }
  }

  ScoredSeq* novelSeq = new ScoredSeqShallow(true, cSeq, scores, links, seqLen);
  //ScoredSeq* novelSeq = ScoredSeq::getScoredSeq(cSeq, scores, links, seqLen);
  /*
  delete [] cSeq;
  delete [] scores;
  delete [] links;
  */
  return novelSeq;
}
void AssemblyGraph::buildDeBruijnGraphFromGfa(QString fullFileName)
{
    m_graphFileType = GFA;

    QFile inputFile(fullFileName);
    if (inputFile.open(QIODevice::ReadOnly))
    {
        std::vector<QString> edgeStartingNodeNames;
        std::vector<QString> edgeEndingNodeNames;
        std::vector<int> edgeOverlaps;

        QTextStream in(&inputFile);
        while (!in.atEnd())
        {
            QApplication::processEvents();
            QString line = in.readLine();

            QStringList lineParts = line.split(QRegExp("\t"));

            if (lineParts.size() < 1)
                continue;

            //Lines beginning with "S" are sequence (node) lines
            if (lineParts.at(0) == "S")
            {
                if (lineParts.size() < 3)
                    throw "load error";

                QString nodeName = lineParts.at(1);
                QString posNodeName = nodeName + "+";
                QString negNodeName = nodeName + "-";

                QByteArray sequence = lineParts.at(2).toLocal8Bit();
                QByteArray revCompSequence = getReverseComplement(sequence);

                //If there is an attribute holding the read depth, we'll use that.
                //If there isn't, then we'll use zero.
                double nodeReadDepth = 0.0;

                for (int i = 3; i < lineParts.size(); ++i)
                {
                    QString part = lineParts.at(i);
                    if (part.size() < 6)
                        continue;
                    else if (part.left(5) == "KC:f:")
                        nodeReadDepth = part.right(part.length() - 5).toDouble();
                }

                DeBruijnNode * node = new DeBruijnNode(posNodeName, nodeReadDepth, sequence);
                DeBruijnNode * reverseComplementNode = new DeBruijnNode(negNodeName, nodeReadDepth, revCompSequence);
                node->setReverseComplement(reverseComplementNode);
                reverseComplementNode->setReverseComplement(node);
                m_deBruijnGraphNodes.insert(posNodeName, node);
                m_deBruijnGraphNodes.insert(negNodeName, reverseComplementNode);
            }

            //Lines beginning with "L" are link (edge) lines
            else if (lineParts.at(0) == "L")
            {
                //Edges aren't made now, in case their sequence hasn't yet been specified.
                //Instead, we save the starting and ending nodes and make the edges after
                //we're done looking at the file.

                if (lineParts.size() < 6)
                    throw "load error";

                //Parts 1 and 3 hold the node names and parts 2 and 4 hold the corresponding +/-.
                QString startingNode = lineParts.at(1) + lineParts.at(2);
                QString endingNode = lineParts.at(3) + lineParts.at(4);
                edgeStartingNodeNames.push_back(startingNode);
                edgeEndingNodeNames.push_back(endingNode);

                //Part 5 holds the node overlap cigar string
                QString cigar = lineParts.at(5);
                edgeOverlaps.push_back(getLengthFromCigar(cigar));
            }
        }

        //Create all of the edges
        for (size_t i = 0; i < edgeStartingNodeNames.size(); ++i)
        {
            QString node1Name = edgeStartingNodeNames[i];
            QString node2Name = edgeEndingNodeNames[i];
            int overlap = edgeOverlaps[i];
            createDeBruijnEdge(node1Name, node2Name, overlap);
        }
    }

    if (m_deBruijnGraphNodes.size() == 0)
        throw "load error";
}
void AssemblyGraph::buildDeBruijnGraphFromLastGraph(QString fullFileName)
{
    m_graphFileType = LAST_GRAPH;

    bool firstLine = true;
    QFile inputFile(fullFileName);
    if (inputFile.open(QIODevice::ReadOnly))
    {
        QTextStream in(&inputFile);
        while (!in.atEnd())
        {
            QApplication::processEvents();
            QString line = in.readLine();

            if (firstLine)
            {
                QStringList firstLineParts = line.split(QRegExp("\\s+"));
                if (firstLineParts.size() > 2)
                m_kmer = firstLineParts[2].toInt();
                firstLine = false;
            }

            if (line.startsWith("NODE"))
            {
                QStringList nodeDetails = line.split(QRegExp("\\s+"));

                if (nodeDetails.size() < 4)
                    throw "load error";

                QString nodeName = nodeDetails.at(1);
                QString posNodeName = nodeName + "+";
                QString negNodeName = nodeName + "-";

                int nodeLength = nodeDetails.at(2).toInt();

                double nodeReadDepth;
                if (nodeLength > 0)
                    nodeReadDepth = double(nodeDetails.at(3).toInt()) / nodeLength; //IS THIS COLUMN ($COV_SHORT1) THE BEST ONE TO USE?
                else
                    nodeReadDepth = double(nodeDetails.at(3).toInt());

                QByteArray sequence = in.readLine().toLocal8Bit();
                QByteArray revCompSequence = in.readLine().toLocal8Bit();

                DeBruijnNode * node = new DeBruijnNode(posNodeName, nodeReadDepth, sequence);
                DeBruijnNode * reverseComplementNode = new DeBruijnNode(negNodeName, nodeReadDepth, revCompSequence);
                node->setReverseComplement(reverseComplementNode);
                reverseComplementNode->setReverseComplement(node);
                m_deBruijnGraphNodes.insert(posNodeName, node);
                m_deBruijnGraphNodes.insert(negNodeName, reverseComplementNode);
            }
            else if (line.startsWith("ARC"))
            {
                QStringList arcDetails = line.split(QRegExp("\\s+"));

                if (arcDetails.size() < 3)
                    throw "load error";

                QString node1Name = convertNormalNumberStringToBandageNodeName(arcDetails.at(1));
                QString node2Name = convertNormalNumberStringToBandageNodeName(arcDetails.at(2));

                createDeBruijnEdge(node1Name, node2Name);
            }
        }
        inputFile.close();

        setAllEdgesExactOverlap(0);
    }

    if (m_deBruijnGraphNodes.size() == 0)
        throw "load error";
}
示例#19
0
pair<AssemblyJobSsDeBruijn::NucNode*,long> AssemblyJobSsDeBruijn::makeGraphTree(set<ScoredSeq*>* seqs, int kmerSize){
  NucNode* topNode = new NucNode(0, '\0');
  long numBaseNodes = 0;
  if (kmerSize > 0){
    for (set<ScoredSeq*>::iterator seqIt = seqs->begin(); seqIt != seqs->end(); ++seqIt){
      ScoredSeq* seq = *seqIt;

      // these are the nodes for which the current nuc is at the Nth position in the kmer;
      // the elements earlier in the array represent kmers that come later in the sequence
      SeqNode* currentNodes[kmerSize-1];
      float worstScores[kmerSize-1];
      // for making the de Bruijn graph at the tips
      DeBruijnNode* finishedNode = 0;
      DeBruijnNode* priorNode = 0;

      // used to determine how far along the kmer construction has made it
      long maxCurrentNodeIndex = 0;
      bool createFinishedNode = false;

      char* nucs = seq->getSeq('+');
      float* scores = seq->getScores('+');
      float* links = seq->getLinks('+');
      long seqSize = seq->size();
      long seqSizeM1 = seqSize - 1;

      for (long pos = 0; pos < seqSize; ++pos){
	char nuc = nucs[pos];
	float nucScore = scores[pos];
	// will only be used up to the second-to-last position
	float linkScore;
	if (pos < seqSizeM1){ linkScore = links[pos]; }
	else { linkScore = 0; }

	// re-set the derivation of kmers if the nucleotide is an N
	if (nuc == 'N'){
	  maxCurrentNodeIndex = 0;
	  createFinishedNode = false;
	  finishedNode = 0;
	  priorNode = 0;
	} else {
	  priorNode = finishedNode;
	  if (createFinishedNode){
	    SeqNode* oldNode = currentNodes[maxCurrentNodeIndex];
	    float worstScore = worstScores[maxCurrentNodeIndex];
	    finishedNode = dynamic_cast<DeBruijnNode*>( oldNode->getBranch(nuc) );
	    if (finishedNode == 0){
	      finishedNode = new DeBruijnNode(oldNode, nuc);
	      oldNode->addBranch(finishedNode);
	      numBaseNodes++;
	    }
	    // add the kmer score to the node; the worst score possible!
	    if ( nucScore < worstScore ){ worstScore = nucScore; }
	    finishedNode->addKmerScore(worstScore);

	    // add the 3p-directed link score
	    if (priorNode != 0){
	      float link3pScore = links[pos-1];
	      float link5pScore = links[pos-kmerSize];
	      float worseLinkScore = link3pScore;
	      if (link5pScore < worseLinkScore){ worseLinkScore = link5pScore; }
	      priorNode->add3pLink(finishedNode,worseLinkScore);
	      finishedNode->add5pLink(priorNode,worseLinkScore);
	    }
	  }
	  for (int n = maxCurrentNodeIndex; n >= 0; --n){
	    SeqNode* oldNode;
	    float worstScore;
	    if (n==0){
	      oldNode = topNode;
	      worstScore = nucScore;
	    }
	    else {
	      oldNode = currentNodes[n-1];
	      worstScore = worstScores[n-1];
	      if ( nucScore < worstScore ){ worstScore = nucScore; }
	    }
	    if ( linkScore < worstScore ){ worstScore = linkScore; }
	    SeqNode* newNode = oldNode->getBranch(nuc);
	    if (newNode == 0){
	      newNode = new NucNode(oldNode,nuc);
	      oldNode->addBranch( newNode );
	    }
	    currentNodes[n] = newNode;
	    worstScores[n] = worstScore;
	  }
	  if (! createFinishedNode){
	    if (maxCurrentNodeIndex < kmerSize-2){ maxCurrentNodeIndex++; }
	    else { createFinishedNode = true; }
	  }
	}
      }
      delete [] nucs;
      delete [] scores;
      delete [] links;
    }
  }
  return pair<NucNode*,long>(topNode,numBaseNodes);
}