void AssemblyGraph::pointEachNodeToItsReverseComplement() { QMapIterator<QString, DeBruijnNode*> i(m_deBruijnGraphNodes); while (i.hasNext()) { i.next(); DeBruijnNode * positiveNode = i.value(); if (positiveNode->isPositiveNode()) { DeBruijnNode * negativeNode = m_deBruijnGraphNodes[getOppositeNodeName(positiveNode->getName())]; if (negativeNode != 0) { positiveNode->setReverseComplement(negativeNode); negativeNode->setReverseComplement(positiveNode); } } } }
void AssemblyGraph::buildDeBruijnGraphFromGfa(QString fullFileName) { m_graphFileType = GFA; QFile inputFile(fullFileName); if (inputFile.open(QIODevice::ReadOnly)) { std::vector<QString> edgeStartingNodeNames; std::vector<QString> edgeEndingNodeNames; std::vector<int> edgeOverlaps; QTextStream in(&inputFile); while (!in.atEnd()) { QApplication::processEvents(); QString line = in.readLine(); QStringList lineParts = line.split(QRegExp("\t")); if (lineParts.size() < 1) continue; //Lines beginning with "S" are sequence (node) lines if (lineParts.at(0) == "S") { if (lineParts.size() < 3) throw "load error"; QString nodeName = lineParts.at(1); QString posNodeName = nodeName + "+"; QString negNodeName = nodeName + "-"; QByteArray sequence = lineParts.at(2).toLocal8Bit(); QByteArray revCompSequence = getReverseComplement(sequence); //If there is an attribute holding the read depth, we'll use that. //If there isn't, then we'll use zero. double nodeReadDepth = 0.0; for (int i = 3; i < lineParts.size(); ++i) { QString part = lineParts.at(i); if (part.size() < 6) continue; else if (part.left(5) == "KC:f:") nodeReadDepth = part.right(part.length() - 5).toDouble(); } DeBruijnNode * node = new DeBruijnNode(posNodeName, nodeReadDepth, sequence); DeBruijnNode * reverseComplementNode = new DeBruijnNode(negNodeName, nodeReadDepth, revCompSequence); node->setReverseComplement(reverseComplementNode); reverseComplementNode->setReverseComplement(node); m_deBruijnGraphNodes.insert(posNodeName, node); m_deBruijnGraphNodes.insert(negNodeName, reverseComplementNode); } //Lines beginning with "L" are link (edge) lines else if (lineParts.at(0) == "L") { //Edges aren't made now, in case their sequence hasn't yet been specified. //Instead, we save the starting and ending nodes and make the edges after //we're done looking at the file. if (lineParts.size() < 6) throw "load error"; //Parts 1 and 3 hold the node names and parts 2 and 4 hold the corresponding +/-. QString startingNode = lineParts.at(1) + lineParts.at(2); QString endingNode = lineParts.at(3) + lineParts.at(4); edgeStartingNodeNames.push_back(startingNode); edgeEndingNodeNames.push_back(endingNode); //Part 5 holds the node overlap cigar string QString cigar = lineParts.at(5); edgeOverlaps.push_back(getLengthFromCigar(cigar)); } } //Create all of the edges for (size_t i = 0; i < edgeStartingNodeNames.size(); ++i) { QString node1Name = edgeStartingNodeNames[i]; QString node2Name = edgeEndingNodeNames[i]; int overlap = edgeOverlaps[i]; createDeBruijnEdge(node1Name, node2Name, overlap); } } if (m_deBruijnGraphNodes.size() == 0) throw "load error"; }
void AssemblyGraph::buildDeBruijnGraphFromLastGraph(QString fullFileName) { m_graphFileType = LAST_GRAPH; bool firstLine = true; QFile inputFile(fullFileName); if (inputFile.open(QIODevice::ReadOnly)) { QTextStream in(&inputFile); while (!in.atEnd()) { QApplication::processEvents(); QString line = in.readLine(); if (firstLine) { QStringList firstLineParts = line.split(QRegExp("\\s+")); if (firstLineParts.size() > 2) m_kmer = firstLineParts[2].toInt(); firstLine = false; } if (line.startsWith("NODE")) { QStringList nodeDetails = line.split(QRegExp("\\s+")); if (nodeDetails.size() < 4) throw "load error"; QString nodeName = nodeDetails.at(1); QString posNodeName = nodeName + "+"; QString negNodeName = nodeName + "-"; int nodeLength = nodeDetails.at(2).toInt(); double nodeReadDepth; if (nodeLength > 0) nodeReadDepth = double(nodeDetails.at(3).toInt()) / nodeLength; //IS THIS COLUMN ($COV_SHORT1) THE BEST ONE TO USE? else nodeReadDepth = double(nodeDetails.at(3).toInt()); QByteArray sequence = in.readLine().toLocal8Bit(); QByteArray revCompSequence = in.readLine().toLocal8Bit(); DeBruijnNode * node = new DeBruijnNode(posNodeName, nodeReadDepth, sequence); DeBruijnNode * reverseComplementNode = new DeBruijnNode(negNodeName, nodeReadDepth, revCompSequence); node->setReverseComplement(reverseComplementNode); reverseComplementNode->setReverseComplement(node); m_deBruijnGraphNodes.insert(posNodeName, node); m_deBruijnGraphNodes.insert(negNodeName, reverseComplementNode); } else if (line.startsWith("ARC")) { QStringList arcDetails = line.split(QRegExp("\\s+")); if (arcDetails.size() < 3) throw "load error"; QString node1Name = convertNormalNumberStringToBandageNodeName(arcDetails.at(1)); QString node2Name = convertNormalNumberStringToBandageNodeName(arcDetails.at(2)); createDeBruijnEdge(node1Name, node2Name); } } inputFile.close(); setAllEdgesExactOverlap(0); } if (m_deBruijnGraphNodes.size() == 0) throw "load error"; }