void AssemblyGraph::determineGraphInfo()
{
    m_shortestContig = std::numeric_limits<long long>::max();
    m_longestContig = 0;
    int nodeCount = 0;
    long long totalLength = 0;
    std::vector<double> nodeReadDepths;

    QMapIterator<QString, DeBruijnNode*> i(m_deBruijnGraphNodes);
    while (i.hasNext())
    {
        i.next();
        long long nodeLength = i.value()->getLength();

        if (nodeLength < m_shortestContig)
            m_shortestContig = nodeLength;
        if (nodeLength > m_longestContig)
            m_longestContig = nodeLength;

        //Only add up the length for positive nodes
        if (i.value()->isPositiveNode())
        {
            totalLength += nodeLength;
            ++nodeCount;
        }

        nodeReadDepths.push_back(i.value()->getReadDepth());
    }

    //Count up the edges.  Edges that are their own pairs will
    //not be counted, as these won't show up in single mode.
    int edgeCount = 0;
    for (size_t i = 0; i < m_deBruijnGraphEdges.size(); ++i)
    {
        DeBruijnEdge * edge = m_deBruijnGraphEdges[i];
        if (edge != edge->getReverseComplement())
            ++edgeCount;
    }
    edgeCount /= 2;

    m_nodeCount = nodeCount;
    m_edgeCount = edgeCount;
    m_totalLength = totalLength;
    m_meanReadDepth = getMeanDeBruijnGraphReadDepth();

    std::sort(nodeReadDepths.begin(), nodeReadDepths.end());

    double firstQuartileIndex = nodeReadDepths.size() / 4.0;
    double medianIndex = nodeReadDepths.size() / 2.0;
    double thirdQuartileIndex = nodeReadDepths.size() * 3.0 / 4.0;

    m_firstQuartileReadDepth = getValueUsingFractionalIndex(&nodeReadDepths, firstQuartileIndex);
    m_medianReadDepth = getValueUsingFractionalIndex(&nodeReadDepths, medianIndex);
    m_thirdQuartileReadDepth = getValueUsingFractionalIndex(&nodeReadDepths, thirdQuartileIndex);

    //Set the auto base pairs per segment
    int totalSegments = m_nodeCount * g_settings->meanSegmentsPerNode;
    g_settings->autoBasePairsPerSegment = m_totalLength / totalSegments;
}
Ejemplo n.º 2
0
bool DeBruijnNode::isNodeConnected(DeBruijnNode * node) const
{
    for (size_t i = 0; i < m_edges.size(); ++i)
    {
        DeBruijnEdge * edge = m_edges[i];
        if (edge->getStartingNode() == node || edge->getEndingNode() == node)
            return true;
    }
    return false;
}
Ejemplo n.º 3
0
//This function checks to see if the passed node leads away from
//this node.  If so, it returns the connecting edge.  If not,
//it returns a null pointer.
DeBruijnEdge * DeBruijnNode::doesNodeLeadAway(DeBruijnNode * node) const
{
    for (size_t i = 0; i < m_edges.size(); ++i)
    {
        DeBruijnEdge * edge = m_edges[i];
        if (edge->getStartingNode() == this && edge->getEndingNode() == node)
            return edge;
    }
    return 0;
}
Ejemplo n.º 4
0
std::vector<DeBruijnEdge *> DeBruijnNode::getLeavingEdges() const
{
    std::vector<DeBruijnEdge *> returnVector;
    for (size_t i = 0; i < m_edges.size(); ++i)
    {
        DeBruijnEdge * edge = m_edges[i];
        if (this == edge->getStartingNode())
            returnVector.push_back(edge);
    }
    return returnVector;
}
Ejemplo n.º 5
0
//If the node has an edge which leads to itself (creating a loop), this function
//will return it.  Otherwise, it returns 0.
DeBruijnEdge * DeBruijnNode::getSelfLoopingEdge() const
{
    for (size_t i = 0; i < m_edges.size(); ++i)
    {
        DeBruijnEdge * edge = m_edges[i];
        if (edge->getStartingNode() == this && edge->getEndingNode() == this)
            return edge;
    }

    return 0;
}
Ejemplo n.º 6
0
//This function checks whether this node has any path leading outward that
//unambiguously leads to the given node.
//It checks a number of steps as set by the contiguitySearchSteps setting.
//If includeReverseComplement is true, then this function returns true if
//all paths lead either to the node or its reverse complement node.
bool DeBruijnNode::doesPathLeadOnlyToNode(DeBruijnNode * node, bool includeReverseComplement)
{
    for (size_t i = 0; i < m_edges.size(); ++i)
    {
        DeBruijnEdge * edge = m_edges[i];
        bool outgoingEdge = (this == edge->getStartingNode());

        std::vector<DeBruijnNode *> pathSoFar;
        pathSoFar.push_back(this);
        if (edge->leadsOnlyToNode(outgoingEdge, g_settings->contiguitySearchSteps, node, pathSoFar, includeReverseComplement))
            return true;
    }

    return false;
}
//This function makes a double edge: in one direction for the given nodes
//and the opposite direction for their reverse complements.  It adds the
//new edges to the vector here and to the nodes themselves.
void AssemblyGraph::createDeBruijnEdge(QString node1Name, QString node2Name, int overlap)
{
    QString node1Opposite = getOppositeNodeName(node1Name);
    QString node2Opposite = getOppositeNodeName(node2Name);

    //Quit if any of the nodes don't exist.
    if (!m_deBruijnGraphNodes.contains(node1Name) ||
            !m_deBruijnGraphNodes.contains(node2Name) ||
            !m_deBruijnGraphNodes.contains(node1Opposite) ||
            !m_deBruijnGraphNodes.contains(node2Opposite))
        return;

    DeBruijnNode * node1 = m_deBruijnGraphNodes[node1Name];
    DeBruijnNode * node2 = m_deBruijnGraphNodes[node2Name];
    DeBruijnNode * negNode1 = m_deBruijnGraphNodes[node1Opposite];
    DeBruijnNode * negNode2 = m_deBruijnGraphNodes[node2Opposite];

    //Quit if the edge already exists
    const std::vector<DeBruijnEdge *> * edges = node1->getEdgesPointer();
    for (size_t i = 0; i < edges->size(); ++i)
    {
        if ((*edges)[i]->getStartingNode() == node1 &&
                (*edges)[i]->getEndingNode() == node2)
            return;
    }

    //Usually, an edge has a different pair, but it is possible
    //for an edge to be its own pair.
    bool isOwnPair = (node1 == negNode2 && node2 == negNode1);

    DeBruijnEdge * forwardEdge = new DeBruijnEdge(node1, node2);
    DeBruijnEdge * backwardEdge;

    if (isOwnPair)
        backwardEdge = forwardEdge;
    else
        backwardEdge = new DeBruijnEdge(negNode2, negNode1);

    forwardEdge->setReverseComplement(backwardEdge);
    backwardEdge->setReverseComplement(forwardEdge);

    forwardEdge->setOverlap(overlap);
    backwardEdge->setOverlap(overlap);

    m_deBruijnGraphEdges.push_back(forwardEdge);
    if (!isOwnPair)
        m_deBruijnGraphEdges.push_back(backwardEdge);

    node1->addEdge(forwardEdge);
    node2->addEdge(forwardEdge);
    negNode1->addEdge(backwardEdge);
    negNode2->addEdge(backwardEdge);
}
Ejemplo n.º 8
0
//This function determines the contiguity of nodes relative to this one.
//It has two steps:
// -First, for each edge leaving this node, all paths outward are found.
//  Any nodes in any path are MAYBE_CONTIGUOUS, and nodes in all of the
//  paths are CONTIGUOUS.
// -Second, it is necessary to check in the opposite direction - for each
//  of the MAYBE_CONTIGUOUS nodes, do they have a path that unambiguously
//  leads to this node?  If so, then they are CONTIGUOUS.
void DeBruijnNode::determineContiguity()
{
    upgradeContiguityStatus(STARTING);

    //A set is used to store all nodes found in the paths, as the nodes
    //that show up as MAYBE_CONTIGUOUS will have their paths checked
    //to this node.
    std::set<DeBruijnNode *> allCheckedNodes;

    //For each path leaving this node, find all possible paths
    //outward.  Nodes in any of the paths for an edge are
    //MAYBE_CONTIGUOUS.  Nodes in all of the paths for an edge
    //are CONTIGUOUS.
    for (size_t i = 0; i < m_edges.size(); ++i)
    {
        DeBruijnEdge * edge = m_edges[i];
        bool outgoingEdge = (this == edge->getStartingNode());

        std::vector< std::vector <DeBruijnNode *> > allPaths;
        edge->tracePaths(outgoingEdge, g_settings->contiguitySearchSteps, &allPaths, this);

        //Set all nodes in the paths as MAYBE_CONTIGUOUS
        for (size_t j = 0; j < allPaths.size(); ++j)
        {
            QApplication::processEvents();
            for (size_t k = 0; k < allPaths[j].size(); ++k)
            {
                DeBruijnNode * node = allPaths[j][k];
                node->upgradeContiguityStatus(MAYBE_CONTIGUOUS);
                allCheckedNodes.insert(node);
            }
        }

        //Set all common nodes as CONTIGUOUS_STRAND_SPECIFIC
        std::vector<DeBruijnNode *> commonNodesStrandSpecific = getNodesCommonToAllPaths(&allPaths, false);
        for (size_t j = 0; j < commonNodesStrandSpecific.size(); ++j)
            (commonNodesStrandSpecific[j])->upgradeContiguityStatus(CONTIGUOUS_STRAND_SPECIFIC);

        //Set all common nodes (when including reverse complement nodes)
        //as CONTIGUOUS_EITHER_STRAND
        std::vector<DeBruijnNode *> commonNodesEitherStrand = getNodesCommonToAllPaths(&allPaths, true);
        for (size_t j = 0; j < commonNodesEitherStrand.size(); ++j)
        {
            DeBruijnNode * node = commonNodesEitherStrand[j];
            node->upgradeContiguityStatus(CONTIGUOUS_EITHER_STRAND);
            node->getReverseComplement()->upgradeContiguityStatus(CONTIGUOUS_EITHER_STRAND);
        }
    }

    //For each node that was checked, then we check to see if any
    //of its paths leads unambiuously back to the starting node (this node).
    for (std::set<DeBruijnNode *>::iterator i = allCheckedNodes.begin(); i != allCheckedNodes.end(); ++i)
    {
        QApplication::processEvents();
        DeBruijnNode * node = *i;
        ContiguityStatus status = node->getContiguityStatus();

        //First check without reverse complement target for
        //strand-specific contiguity.
        if (status != CONTIGUOUS_STRAND_SPECIFIC &&
                node->doesPathLeadOnlyToNode(this, false))
            node->upgradeContiguityStatus(CONTIGUOUS_STRAND_SPECIFIC);

        //Now check including the reverse complement target for
        //either strand contiguity.
        if (status != CONTIGUOUS_STRAND_SPECIFIC &&
                status != CONTIGUOUS_EITHER_STRAND &&
                node->doesPathLeadOnlyToNode(this, true))
        {
            node->upgradeContiguityStatus(CONTIGUOUS_EITHER_STRAND);
            node->getReverseComplement()->upgradeContiguityStatus(CONTIGUOUS_EITHER_STRAND);
        }
    }
}
void AssemblyGraph::autoDetermineAllEdgesExactOverlap()
{
    int edgeCount = m_deBruijnGraphEdges.size();
    if (edgeCount == 0)
        return;

    //Determine the overlap for each edge and produce a vector
    //that
    for (size_t i = 0; i < m_deBruijnGraphEdges.size(); ++i)
        m_deBruijnGraphEdges[i]->autoDetermineExactOverlap();

    //The expectation here is that most overlaps will be
    //the same or from a small subset of possible sizes.
    //Edges with an overlap that do not match the most common
    //overlap(s) are suspected of having their overlap
    //misidentified.  They are therefore rechecked using the
    //common ones.
    std::vector<int> overlapCounts = makeOverlapCountVector();

    //Sort the overlaps in order of decreasing numbers of edges.
    //I.e. the first overlap size in the vector will be the most
    //common overlap, the second will be the second most common,
    //etc.
    std::vector<int> sortedOverlaps;
    int overlapsSoFar = 0;
    double fractionOverlapsFound = 0.0;
    while (fractionOverlapsFound < 1.0)
    {
        int mostCommonOverlap = 0;
        int mostCommonOverlapCount = 0;

        //Find the overlap size with the most instances.
        for (size_t i = 0; i < overlapCounts.size(); ++i)
        {
            if (overlapCounts[i] > mostCommonOverlapCount)
            {
                mostCommonOverlap = i;
                mostCommonOverlapCount = overlapCounts[i];
            }
        }

        //Add that overlap to the common collection and remove it from the counts.
        sortedOverlaps.push_back(mostCommonOverlap);
        overlapsSoFar += mostCommonOverlapCount;
        fractionOverlapsFound = double(overlapsSoFar) / edgeCount;
        overlapCounts[mostCommonOverlap] = 0;
    }

    //For each edge, see if one of the more common overlaps also works.
    //If so, use that instead.
    for (size_t i = 0; i < m_deBruijnGraphEdges.size(); ++i)
    {
        DeBruijnEdge * edge = m_deBruijnGraphEdges[i];
        for (size_t j = 0; j < sortedOverlaps.size(); ++j)
        {
            if (edge->getOverlap() == sortedOverlaps[j])
                break;
            else if (edge->testExactOverlap(sortedOverlaps[j]))
            {
                edge->setOverlap(sortedOverlaps[j]);
                break;
            }
        }
    }
}