Ejemplo n.º 1
0
//This function gets the node's sequence.  The full parameter only has an effect
//for Velvet LastGraph files where the sequences are shifted from their reverse
//complement.  If full is true and the graph is from Velvet, this function will
//extend the sequence using either the reverse complement or upstream nodes.
QByteArray DeBruijnNode::getFullSequence() const
{
    if (g_assemblyGraph->m_graphFileType != LAST_GRAPH)
        return getSequence();

    //If the code got here, then we are getting a full sequence from a Velvet
    //LastGraph graph, so we need to extend the beginning of the sequence.
    int extensionLength = g_assemblyGraph->m_kmer - 1;

    //If the node is at least k-1 in length, then the necessary sequence can be
    //deduced from the reverse complement node.
    if (getLength() >= extensionLength)
    {
        QByteArray revCompSeq = getReverseComplement()->getSequence();
        QByteArray endOfRevCompSeq = revCompSeq.right(extensionLength);
        QByteArray extension = AssemblyGraph::getReverseComplement(endOfRevCompSeq);
        return extension + getSequence();
    }

    //If the node is not long enough, then we must look in upstream nodes for
    //the rest of the sequence.
    else
    {
        QByteArray extension = getUpstreamSequence(extensionLength);
        if (extension.length() < extensionLength)
        {
            int additionalBases = extensionLength - extension.length();
            QByteArray n;
            n.fill('N', additionalBases);
            extension = n + extension;
        }
        return extension + getSequence();
    }
}
void AssemblyGraph::makeReverseComplementNodeIfNecessary(DeBruijnNode * node)
{
    QString reverseComplementName = getOppositeNodeName(node->getName());

    DeBruijnNode * reverseComplementNode = m_deBruijnGraphNodes[reverseComplementName];
    if (reverseComplementNode == 0)
    {
        DeBruijnNode * newNode = new DeBruijnNode(reverseComplementName, node->getReadDepth(),
                                                  getReverseComplement(node->getSequence()));
        m_deBruijnGraphNodes.insert(reverseComplementName, newNode);
    }
}
Ejemplo n.º 3
0
std::vector<BarcodePart> DeBruijnNode::getBarcodePartsForThisNodeOrReverseComplement(double scaledNodeLength) const
{
    const DeBruijnNode * positiveNode = this;
    const DeBruijnNode * negativeNode = getReverseComplement();
    if (isNegativeNode())
        std::swap(positiveNode, negativeNode);

    //Look for blast hit parts on both the positive and the negative node,
    //since hits were previously filtered such that startPos < endPos,
    //hence we need to look at both positive and negative nodes to recover all hits.
    std::vector<BarcodePart> returnVector;
    for (size_t i = 0; i < positiveNode->m_barcodes.size(); ++i)
    {
        std::vector<BarcodePart> hitParts = positiveNode->m_barcodes[i]->getBarcodeParts(false, scaledNodeLength);
        returnVector.insert(returnVector.end(), hitParts.begin(), hitParts.end());
    }
    for (size_t i = 0; i < negativeNode->m_barcodes.size(); ++i)
    {
        std::vector<BarcodePart> hitParts = negativeNode->m_barcodes[i]->getBarcodeParts(false, scaledNodeLength);
        returnVector.insert(returnVector.end(), hitParts.begin(), hitParts.end());
    }
    return returnVector;
}
void AssemblyGraph::buildDeBruijnGraphFromGfa(QString fullFileName)
{
    m_graphFileType = GFA;

    QFile inputFile(fullFileName);
    if (inputFile.open(QIODevice::ReadOnly))
    {
        std::vector<QString> edgeStartingNodeNames;
        std::vector<QString> edgeEndingNodeNames;
        std::vector<int> edgeOverlaps;

        QTextStream in(&inputFile);
        while (!in.atEnd())
        {
            QApplication::processEvents();
            QString line = in.readLine();

            QStringList lineParts = line.split(QRegExp("\t"));

            if (lineParts.size() < 1)
                continue;

            //Lines beginning with "S" are sequence (node) lines
            if (lineParts.at(0) == "S")
            {
                if (lineParts.size() < 3)
                    throw "load error";

                QString nodeName = lineParts.at(1);
                QString posNodeName = nodeName + "+";
                QString negNodeName = nodeName + "-";

                QByteArray sequence = lineParts.at(2).toLocal8Bit();
                QByteArray revCompSequence = getReverseComplement(sequence);

                //If there is an attribute holding the read depth, we'll use that.
                //If there isn't, then we'll use zero.
                double nodeReadDepth = 0.0;

                for (int i = 3; i < lineParts.size(); ++i)
                {
                    QString part = lineParts.at(i);
                    if (part.size() < 6)
                        continue;
                    else if (part.left(5) == "KC:f:")
                        nodeReadDepth = part.right(part.length() - 5).toDouble();
                }

                DeBruijnNode * node = new DeBruijnNode(posNodeName, nodeReadDepth, sequence);
                DeBruijnNode * reverseComplementNode = new DeBruijnNode(negNodeName, nodeReadDepth, revCompSequence);
                node->setReverseComplement(reverseComplementNode);
                reverseComplementNode->setReverseComplement(node);
                m_deBruijnGraphNodes.insert(posNodeName, node);
                m_deBruijnGraphNodes.insert(negNodeName, reverseComplementNode);
            }

            //Lines beginning with "L" are link (edge) lines
            else if (lineParts.at(0) == "L")
            {
                //Edges aren't made now, in case their sequence hasn't yet been specified.
                //Instead, we save the starting and ending nodes and make the edges after
                //we're done looking at the file.

                if (lineParts.size() < 6)
                    throw "load error";

                //Parts 1 and 3 hold the node names and parts 2 and 4 hold the corresponding +/-.
                QString startingNode = lineParts.at(1) + lineParts.at(2);
                QString endingNode = lineParts.at(3) + lineParts.at(4);
                edgeStartingNodeNames.push_back(startingNode);
                edgeEndingNodeNames.push_back(endingNode);

                //Part 5 holds the node overlap cigar string
                QString cigar = lineParts.at(5);
                edgeOverlaps.push_back(getLengthFromCigar(cigar));
            }
        }

        //Create all of the edges
        for (size_t i = 0; i < edgeStartingNodeNames.size(); ++i)
        {
            QString node1Name = edgeStartingNodeNames[i];
            QString node2Name = edgeEndingNodeNames[i];
            int overlap = edgeOverlaps[i];
            createDeBruijnEdge(node1Name, node2Name, overlap);
        }
    }

    if (m_deBruijnGraphNodes.size() == 0)
        throw "load error";
}