static void concatenateLongReads(Node * node, Node * candidate, Graph * graph) { PassageMarkerI marker, tmpMarker; // Passage marker management in node: for (marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker)) { if (!goesToNode(marker, candidate)) incrementFinishOffset(marker, getNodeLength(candidate)); } // Swapping new born passageMarkers from candidate to node for (marker = getMarker(candidate); marker != NULL_IDX; marker = tmpMarker) { tmpMarker = getNextInNode(marker); if (!comesFromNode(marker, node)) { extractPassageMarker(marker); incrementStartOffset(marker, getNodeLength(node)); transposePassageMarker(marker, node); incrementFinishOffset(getTwinMarker(marker), getNodeLength(node)); } else { reconnectPassageMarker(marker, node, &tmpMarker); } } }
static void admitGroupies(Node * source, Node * bypass) { PassageMarkerI marker, tmpMarker; for (marker = getMarker(source); marker != NULL_IDX; marker = tmpMarker) { tmpMarker = getNextInNode(marker); extractPassageMarker(marker); transposePassageMarker(marker, bypass); incrementFinishOffset(getTwinMarker(marker), getNodeLength(bypass)); } }
static void threadSequenceThroughGraph(TightString * tString, KmerOccurenceTable * kmerTable, Graph * graph, IDnum seqID, Category category, boolean readTracking, boolean double_strand, ReferenceMapping * referenceMappings, Coordinate referenceMappingCount, IDnum refCount, Annotation * annotations, IDnum annotationCount, boolean second_in_pair) { Kmer word; Kmer antiWord; Coordinate readNucleotideIndex; Coordinate kmerIndex; KmerOccurence *kmerOccurence; int wordLength = getWordLength(graph); PassageMarkerI marker = NULL_IDX; PassageMarkerI previousMarker = NULL_IDX; Node *node = NULL; Node *previousNode = NULL; Coordinate coord = 0; Coordinate previousCoord = 0; Nucleotide nucleotide; boolean reversed; IDnum refID; Coordinate refCoord = 0; ReferenceMapping * refMap; Annotation * annotation = annotations; Coordinate index = 0; Coordinate uniqueIndex = 0; Coordinate annotIndex = 0; IDnum annotCount = 0; SmallNodeList * nodePile = NULL; // Neglect any string shorter than WORDLENGTH : if (getLength(tString) < wordLength) return; clearKmer(&word); clearKmer(&antiWord); // Fill in the initial word : for (readNucleotideIndex = 0; readNucleotideIndex < wordLength - 1; readNucleotideIndex++) { nucleotide = getNucleotide(readNucleotideIndex, tString); pushNucleotide(&word, nucleotide); if (double_strand || second_in_pair) { #ifdef COLOR reversePushNucleotide(&antiWord, nucleotide); #else reversePushNucleotide(&antiWord, 3 - nucleotide); #endif } } // Go through sequence while (readNucleotideIndex < getLength(tString)) { nucleotide = getNucleotide(readNucleotideIndex++, tString); pushNucleotide(&word, nucleotide); if (double_strand || second_in_pair) { #ifdef COLOR reversePushNucleotide(&antiWord, nucleotide); #else reversePushNucleotide(&antiWord, 3 - nucleotide); #endif } // Update annotation if necessary if (annotCount < annotationCount && annotIndex == getAnnotationLength(annotation)) { annotation = getNextAnnotation(annotation); annotCount++; annotIndex = 0; } // Search for reference mapping if (category == REFERENCE) { if (referenceMappings) refMap = findReferenceMapping(seqID, index, referenceMappings, referenceMappingCount); else refMap = NULL; if (refMap) { node = getNodeInGraph(graph, refMap->nodeID); if (refMap->nodeID > 0) { coord = refMap->nodeStart + (index - refMap->referenceStart); } else { coord = getNodeLength(node) - refMap->nodeStart - refMap->length + (index - refMap->referenceStart); } } else { node = NULL; if (previousNode) break; } } // Search for reference-based mapping else if (annotCount < annotationCount && uniqueIndex >= getPosition(annotation) && getAnnotSequenceID(annotation) <= refCount && getAnnotSequenceID(annotation) >= -refCount) { refID = getAnnotSequenceID(annotation); if (refID > 0) refCoord = getStart(annotation) + annotIndex; else refCoord = getStart(annotation) - annotIndex; refMap = findReferenceMapping(refID, refCoord, referenceMappings, referenceMappingCount); // If success if (refMap) { if (refID > 0) { node = getNodeInGraph(graph, refMap->nodeID); if (refMap->nodeID > 0) { coord = refMap->nodeStart + (refCoord - refMap->referenceStart); } else { coord = getNodeLength(node) - refMap->nodeStart - refMap->length + (refCoord - refMap->referenceStart); } } else { node = getNodeInGraph(graph, -refMap->nodeID); if (refMap->nodeID > 0) { coord = getNodeLength(node) - refMap->nodeStart - (refCoord - refMap->referenceStart) - 1; } else { coord = refMap->nodeStart + refMap->length - (refCoord - refMap->referenceStart) - 1; } } } else { node = NULL; if (previousNode) break; } } // Search in table else { reversed = false; if (double_strand) { if (compareKmers(&word, &antiWord) <= 0) { kmerOccurence = findKmerInKmerOccurenceTable(&word, kmerTable); } else { kmerOccurence = findKmerInKmerOccurenceTable(&antiWord, kmerTable); reversed = true; } } else { if (!second_in_pair) { kmerOccurence = findKmerInKmerOccurenceTable(&word, kmerTable); } else { kmerOccurence = findKmerInKmerOccurenceTable(&antiWord, kmerTable); reversed = true; } } if (kmerOccurence) { if (!reversed) { node = getNodeInGraph(graph, getKmerOccurenceNodeID(kmerOccurence)); coord = getKmerOccurencePosition(kmerOccurence); } else { node = getNodeInGraph(graph, -getKmerOccurenceNodeID(kmerOccurence)); coord = getNodeLength(node) - getKmerOccurencePosition(kmerOccurence) - 1; } } else { node = NULL; if (previousNode) break; } } // Increment positions if (annotCount < annotationCount && uniqueIndex >= getPosition(annotation)) annotIndex++; else uniqueIndex++; // Fill in graph if (node) { #ifdef OPENMP lockNode(node); #endif kmerIndex = readNucleotideIndex - wordLength; if (previousNode == node && previousCoord == coord - 1) { if (category / 2 >= CATEGORIES) { setPassageMarkerFinish(marker, kmerIndex + 1); setFinishOffset(marker, getNodeLength(node) - coord - 1); } else { #ifndef SINGLE_COV_CAT incrementVirtualCoverage(node, category / 2, 1); incrementOriginalVirtualCoverage(node, category / 2, 1); #else incrementVirtualCoverage(node, 1); #endif } #ifdef OPENMP unLockNode(node); #endif } else { if (category / 2 >= CATEGORIES) { marker = newPassageMarker(seqID, kmerIndex, kmerIndex + 1, coord, getNodeLength (node) - coord - 1); transposePassageMarker(marker, node); connectPassageMarkers (previousMarker, marker, graph); previousMarker = marker; } else { if (readTracking) { if (!isNodeMemorized(node, nodePile)) { addReadStart(node, seqID, coord, graph, kmerIndex); memorizeNode(node, &nodePile); } else { blurLastShortReadMarker (node, graph); } } #ifndef SINGLE_COV_CAT incrementVirtualCoverage(node, category / 2, 1); incrementOriginalVirtualCoverage(node, category / 2, 1); #else incrementVirtualCoverage(node, 1); #endif } #ifdef OPENMP lockTwoNodes(node, previousNode); #endif createArc(previousNode, node, graph); #ifdef OPENMP unLockTwoNodes(node, previousNode); #endif } previousNode = node; previousCoord = coord; } index++; } if (readTracking && category / 2 < CATEGORIES) unMemorizeNodes(&nodePile); }
static void threadSequenceThroughGraph(TightString * tString, KmerOccurenceTable * kmerOccurences, Graph * graph, IDnum seqID, Category category, boolean readTracking, boolean double_strand) { Kmer word; Kmer antiWord; Coordinate readNucleotideIndex; Coordinate kmerIndex; KmerOccurence *kmerOccurence; int wordLength = getWordLength(graph); PassageMarker *marker = NULL; PassageMarker *previousMarker = NULL; Node *node; Node *previousNode = NULL; Coordinate coord; Coordinate previousCoord = 0; Nucleotide nucleotide; clearKmer(&word); clearKmer(&antiWord); // Neglect any string shorter than WORDLENGTH : if (getLength(tString) < wordLength) return; // Fill in the initial word : for (readNucleotideIndex = 0; readNucleotideIndex < wordLength - 1; readNucleotideIndex++) { nucleotide = getNucleotide(readNucleotideIndex, tString); pushNucleotide(&word, nucleotide); if (double_strand) { #ifdef COLOR reversePushNucleotide(&antiWord, nucleotide); #else reversePushNucleotide(&antiWord, 3 - nucleotide); #endif } } // Go through sequence while (readNucleotideIndex < getLength(tString)) { nucleotide = getNucleotide(readNucleotideIndex++, tString); pushNucleotide(&word, nucleotide); if (double_strand) { #ifdef COLOR reversePushNucleotide(&antiWord, nucleotide); #else reversePushNucleotide(&antiWord, 3 - nucleotide); #endif } // Search in table if ((!double_strand || compareKmers(&word, &antiWord) <= 0) && (kmerOccurence = findKmerOccurenceInSortedTable(&word, kmerOccurences))) { node = getNodeInGraph(graph, kmerOccurence->nodeID); coord = kmerOccurence->position; } else if ((double_strand && compareKmers(&word, &antiWord) > 0) && (kmerOccurence = findKmerOccurenceInSortedTable(&antiWord, kmerOccurences))) { node = getNodeInGraph(graph, -kmerOccurence->nodeID); coord = getNodeLength(node) - kmerOccurence->position - 1; } else { node = NULL; if (previousNode) { break; } } // Fill in graph if (node) { kmerIndex = readNucleotideIndex - wordLength; if (previousNode == node && previousCoord == coord - 1) { if (category / 2 >= CATEGORIES) { setPassageMarkerFinish(marker, kmerIndex + 1); setFinishOffset(marker, getNodeLength(node) - coord - 1); } else { incrementVirtualCoverage(node, category / 2, 1); incrementOriginalVirtualCoverage (node, category / 2, 1); } } else { if (category / 2 >= CATEGORIES) { marker = newPassageMarker(seqID, kmerIndex, kmerIndex + 1, coord, getNodeLength (node) - coord - 1); transposePassageMarker(marker, node); connectPassageMarkers (previousMarker, marker, graph); previousMarker = marker; } else { if (readTracking) { if (!getNodeStatus(node)) { addReadStart(node, seqID, coord, graph, kmerIndex); setSingleNodeStatus (node, true); memorizeNode(node); } else { blurLastShortReadMarker (node, graph); } } incrementVirtualCoverage(node, category / 2, 1); incrementOriginalVirtualCoverage (node, category / 2, 1); } createArc(previousNode, node, graph); } previousNode = node; previousCoord = coord; } } unlockMemorizedNodes(); }
// Replaces two consecutive nodes into a single equivalent node // The extra memory is freed void concatenateNodes(Node * nodeA, Node * nodeB, Graph * graph) { PassageMarkerI marker, tmpMarker; Node *twinA = getTwinNode(nodeA); Node *twinB = getTwinNode(nodeB); Arc *arc; Category cat; // Arc management: // Freeing useless arcs while (getArc(nodeA) != NULL) destroyArc(getArc(nodeA), graph); // Correct arcs for (arc = getArc(nodeB); arc != NULL; arc = getNextArc(arc)) { if (getDestination(arc) != twinB) createAnalogousArc(nodeA, getDestination(arc), arc, graph); else createAnalogousArc(nodeA, twinA, arc, graph); } // Passage marker management in node A: for (marker = getMarker(nodeA); marker != NULL_IDX; marker = getNextInNode(marker)) if (isTerminal(marker)) incrementFinishOffset(marker, getNodeLength(nodeB)); // Swapping new born passageMarkers from B to A for (marker = getMarker(nodeB); marker != NULL_IDX; marker = tmpMarker) { tmpMarker = getNextInNode(marker); if (isInitial(marker) || getNode(getPreviousInSequence(marker)) != nodeA) { extractPassageMarker(marker); transposePassageMarker(marker, nodeA); incrementFinishOffset(getTwinMarker(marker), getNodeLength(nodeA)); } else disconnectNextPassageMarker(getPreviousInSequence (marker), graph); } // Read starts concatenateReadStarts(nodeA, nodeB, graph); // Gaps appendNodeGaps(nodeA, nodeB, graph); // Descriptor management (node) appendDescriptors(nodeA, nodeB); // Update uniqueness: setUniqueness(nodeA, getUniqueness(nodeA) || getUniqueness(nodeB)); // Update virtual coverage for (cat = 0; cat < CATEGORIES; cat++) incrementVirtualCoverage(nodeA, cat, getVirtualCoverage(nodeB, cat)); // Update original virtual coverage for (cat = 0; cat < CATEGORIES; cat++) incrementOriginalVirtualCoverage(nodeA, cat, getOriginalVirtualCoverage (nodeB, cat)); // Freeing gobbled node destroyNode(nodeB, graph); }
// Replaces two consecutive nodes into a single equivalent node // The extra memory is freed void concatenateStringOfNodes(Node * nodeA, Graph * graph) { Node *twinA = getTwinNode(nodeA); Node * nodeB = nodeA; Node * twinB; Node *currentNode, *nextNode; Coordinate totalLength = 0; PassageMarkerI marker, tmpMarker; Arc *arc; Category cat; while (simpleArcCount(nodeB) == 1 && simpleArcCount(getTwinNode (getDestination(getArc(nodeB)))) == 1 && getDestination(getArc(nodeB)) != getTwinNode(nodeB) && getDestination(getArc(nodeB)) != nodeA) { totalLength += getNodeLength(nodeB); nodeB = getDestination(getArc(nodeB)); } twinB = getTwinNode(nodeB); totalLength += getNodeLength(nodeB); reallocateNodeDescriptor(nodeA, totalLength); currentNode = nodeA; while (currentNode != nodeB) { currentNode = getDestination(getArc(currentNode)); // Passage marker management in node A: for (marker = getMarker(nodeA); marker != NULL_IDX; marker = getNextInNode(marker)) if (getNode(getNextInSequence(marker)) != currentNode) incrementFinishOffset(marker, getNodeLength(currentNode)); // Swapping new born passageMarkers from B to A for (marker = getMarker(currentNode); marker != NULL_IDX; marker = tmpMarker) { tmpMarker = getNextInNode(marker); if (isInitial(marker) || getNode(getPreviousInSequence(marker)) != nodeA) { extractPassageMarker(marker); transposePassageMarker(marker, nodeA); incrementFinishOffset(getTwinMarker(marker), getNodeLength(nodeA)); } else disconnectNextPassageMarker(getPreviousInSequence (marker), graph); } // Read starts concatenateReadStarts(nodeA, currentNode, graph); // Gaps appendNodeGaps(nodeA, currentNode, graph); // Update uniqueness: setUniqueness(nodeA, getUniqueness(nodeA) || getUniqueness(currentNode)); // Update virtual coverage for (cat = 0; cat < CATEGORIES; cat++) incrementVirtualCoverage(nodeA, cat, getVirtualCoverage(currentNode, cat)); // Update original virtual coverage for (cat = 0; cat < CATEGORIES; cat++) incrementOriginalVirtualCoverage(nodeA, cat, getOriginalVirtualCoverage (currentNode, cat)); // Descriptor management (node) directlyAppendDescriptors(nodeA, currentNode, totalLength); } // Correct arcs for (arc = getArc(nodeB); arc != NULL; arc = getNextArc(arc)) { if (getDestination(arc) != twinB) createAnalogousArc(nodeA, getDestination(arc), arc, graph); else createAnalogousArc(nodeA, twinA, arc, graph); } // Freeing gobbled nodes currentNode = getTwinNode(nodeB); while (currentNode != getTwinNode(nodeA)) { arc = getArc(currentNode); nextNode = getDestination(arc); destroyNode(currentNode, graph); currentNode = nextNode; } }