//This function gets the node's sequence. The full parameter only has an effect //for Velvet LastGraph files where the sequences are shifted from their reverse //complement. If full is true and the graph is from Velvet, this function will //extend the sequence using either the reverse complement or upstream nodes. QByteArray DeBruijnNode::getFullSequence() const { if (g_assemblyGraph->m_graphFileType != LAST_GRAPH) return getSequence(); //If the code got here, then we are getting a full sequence from a Velvet //LastGraph graph, so we need to extend the beginning of the sequence. int extensionLength = g_assemblyGraph->m_kmer - 1; //If the node is at least k-1 in length, then the necessary sequence can be //deduced from the reverse complement node. if (getLength() >= extensionLength) { QByteArray revCompSeq = getReverseComplement()->getSequence(); QByteArray endOfRevCompSeq = revCompSeq.right(extensionLength); QByteArray extension = AssemblyGraph::getReverseComplement(endOfRevCompSeq); return extension + getSequence(); } //If the node is not long enough, then we must look in upstream nodes for //the rest of the sequence. else { QByteArray extension = getUpstreamSequence(extensionLength); if (extension.length() < extensionLength) { int additionalBases = extensionLength - extension.length(); QByteArray n; n.fill('N', additionalBases); extension = n + extension; } return extension + getSequence(); } }
void AssemblyGraph::makeReverseComplementNodeIfNecessary(DeBruijnNode * node) { QString reverseComplementName = getOppositeNodeName(node->getName()); DeBruijnNode * reverseComplementNode = m_deBruijnGraphNodes[reverseComplementName]; if (reverseComplementNode == 0) { DeBruijnNode * newNode = new DeBruijnNode(reverseComplementName, node->getReadDepth(), getReverseComplement(node->getSequence())); m_deBruijnGraphNodes.insert(reverseComplementName, newNode); } }
std::vector<BarcodePart> DeBruijnNode::getBarcodePartsForThisNodeOrReverseComplement(double scaledNodeLength) const { const DeBruijnNode * positiveNode = this; const DeBruijnNode * negativeNode = getReverseComplement(); if (isNegativeNode()) std::swap(positiveNode, negativeNode); //Look for blast hit parts on both the positive and the negative node, //since hits were previously filtered such that startPos < endPos, //hence we need to look at both positive and negative nodes to recover all hits. std::vector<BarcodePart> returnVector; for (size_t i = 0; i < positiveNode->m_barcodes.size(); ++i) { std::vector<BarcodePart> hitParts = positiveNode->m_barcodes[i]->getBarcodeParts(false, scaledNodeLength); returnVector.insert(returnVector.end(), hitParts.begin(), hitParts.end()); } for (size_t i = 0; i < negativeNode->m_barcodes.size(); ++i) { std::vector<BarcodePart> hitParts = negativeNode->m_barcodes[i]->getBarcodeParts(false, scaledNodeLength); returnVector.insert(returnVector.end(), hitParts.begin(), hitParts.end()); } return returnVector; }
void AssemblyGraph::buildDeBruijnGraphFromGfa(QString fullFileName) { m_graphFileType = GFA; QFile inputFile(fullFileName); if (inputFile.open(QIODevice::ReadOnly)) { std::vector<QString> edgeStartingNodeNames; std::vector<QString> edgeEndingNodeNames; std::vector<int> edgeOverlaps; QTextStream in(&inputFile); while (!in.atEnd()) { QApplication::processEvents(); QString line = in.readLine(); QStringList lineParts = line.split(QRegExp("\t")); if (lineParts.size() < 1) continue; //Lines beginning with "S" are sequence (node) lines if (lineParts.at(0) == "S") { if (lineParts.size() < 3) throw "load error"; QString nodeName = lineParts.at(1); QString posNodeName = nodeName + "+"; QString negNodeName = nodeName + "-"; QByteArray sequence = lineParts.at(2).toLocal8Bit(); QByteArray revCompSequence = getReverseComplement(sequence); //If there is an attribute holding the read depth, we'll use that. //If there isn't, then we'll use zero. double nodeReadDepth = 0.0; for (int i = 3; i < lineParts.size(); ++i) { QString part = lineParts.at(i); if (part.size() < 6) continue; else if (part.left(5) == "KC:f:") nodeReadDepth = part.right(part.length() - 5).toDouble(); } DeBruijnNode * node = new DeBruijnNode(posNodeName, nodeReadDepth, sequence); DeBruijnNode * reverseComplementNode = new DeBruijnNode(negNodeName, nodeReadDepth, revCompSequence); node->setReverseComplement(reverseComplementNode); reverseComplementNode->setReverseComplement(node); m_deBruijnGraphNodes.insert(posNodeName, node); m_deBruijnGraphNodes.insert(negNodeName, reverseComplementNode); } //Lines beginning with "L" are link (edge) lines else if (lineParts.at(0) == "L") { //Edges aren't made now, in case their sequence hasn't yet been specified. //Instead, we save the starting and ending nodes and make the edges after //we're done looking at the file. if (lineParts.size() < 6) throw "load error"; //Parts 1 and 3 hold the node names and parts 2 and 4 hold the corresponding +/-. QString startingNode = lineParts.at(1) + lineParts.at(2); QString endingNode = lineParts.at(3) + lineParts.at(4); edgeStartingNodeNames.push_back(startingNode); edgeEndingNodeNames.push_back(endingNode); //Part 5 holds the node overlap cigar string QString cigar = lineParts.at(5); edgeOverlaps.push_back(getLengthFromCigar(cigar)); } } //Create all of the edges for (size_t i = 0; i < edgeStartingNodeNames.size(); ++i) { QString node1Name = edgeStartingNodeNames[i]; QString node2Name = edgeEndingNodeNames[i]; int overlap = edgeOverlaps[i]; createDeBruijnEdge(node1Name, node2Name, overlap); } } if (m_deBruijnGraphNodes.size() == 0) throw "load error"; }