static void concatenateLongReads(Node * node, Node * candidate, Graph * graph) { PassageMarkerI marker, tmpMarker; // Passage marker management in node: for (marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker)) { if (!goesToNode(marker, candidate)) incrementFinishOffset(marker, getNodeLength(candidate)); } // Swapping new born passageMarkers from candidate to node for (marker = getMarker(candidate); marker != NULL_IDX; marker = tmpMarker) { tmpMarker = getNextInNode(marker); if (!comesFromNode(marker, node)) { extractPassageMarker(marker); incrementStartOffset(marker, getNodeLength(node)); transposePassageMarker(marker, node); incrementFinishOffset(getTwinMarker(marker), getNodeLength(node)); } else { reconnectPassageMarker(marker, node, &tmpMarker); } } }
static void computePartialReadToNodeMapping(IDnum nodeID, ReadOccurence ** readNodes, IDnum * readNodeCounts, boolean * readMarker, ReadSet * reads) { ShortReadMarker *shortMarker; IDnum index, readIndex; ReadOccurence *readArray, *readOccurence; Node *node = getNodeInGraph(graph, nodeID); ShortReadMarker *nodeArray = getNodeReads(node, graph); IDnum nodeReadCount = getNodeReadCount(node, graph); PassageMarkerI marker; for (index = 0; index < nodeReadCount; index++) { shortMarker = getShortReadMarkerAtIndex(nodeArray, index); readIndex = getShortReadMarkerID(shortMarker); readArray = readNodes[readIndex]; readOccurence = &readArray[readNodeCounts[readIndex]]; readOccurence->nodeID = nodeID; readOccurence->position = getShortReadMarkerPosition(shortMarker); readOccurence->offset = getShortReadMarkerOffset(shortMarker); readNodeCounts[readIndex]++; } for (marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker)) { readIndex = getPassageMarkerSequenceID(marker); if (readIndex <= 0 || reads->categories[readIndex - 1] == REFERENCE) continue; if (!readMarker[readIndex]) { readArray = readNodes[readIndex]; readOccurence = &readArray[readNodeCounts[readIndex]]; readOccurence->nodeID = nodeID; readOccurence->position = getStartOffset(marker); readOccurence->offset = getPassageMarkerStart(marker); readNodeCounts[readIndex]++; readMarker[readIndex] = true; } else { readArray = readNodes[readIndex]; readOccurence = &readArray[readNodeCounts[readIndex] - 1]; readOccurence->position = -1; readOccurence->offset = -1; } } for (marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker)) { readIndex = getPassageMarkerSequenceID(marker); if (readIndex > 0) readMarker[readIndex] = false; } }
static IDnum *computeReadToNodeCounts() { IDnum readIndex, nodeIndex; IDnum maxNodeIndex = 2 * nodeCount(graph) + 1; IDnum maxReadIndex = sequenceCount(graph) + 1; IDnum *readNodeCounts = callocOrExit(maxReadIndex, IDnum); boolean *readMarker = callocOrExit(maxReadIndex, boolean); ShortReadMarker *nodeArray, *shortMarker; PassageMarkerI marker; Node *node; IDnum nodeReadCount; //puts("Computing read to node mapping array sizes"); for (nodeIndex = 0; nodeIndex < maxNodeIndex; nodeIndex++) { node = getNodeInGraph(graph, nodeIndex - nodeCount(graph)); if (node == NULL) continue; // Short reads if (readStartsAreActivated(graph)) { nodeArray = getNodeReads(node, graph); nodeReadCount = getNodeReadCount(node, graph); for (readIndex = 0; readIndex < nodeReadCount; readIndex++) { shortMarker = getShortReadMarkerAtIndex(nodeArray, readIndex); readNodeCounts[getShortReadMarkerID (shortMarker)]++; } } // Long reads for (marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker)) { readIndex = getPassageMarkerSequenceID(marker); if (readIndex < 0) continue; if (readMarker[readIndex]) continue; readNodeCounts[readIndex]++; readMarker[readIndex] = true; } // Clean up marker array for (marker = getMarker(node); marker != NULL_IDX; marker = getNextInNode(marker)) { readIndex = getPassageMarkerSequenceID(marker); if (readIndex > 0) readMarker[readIndex] = false; } } free(readMarker); return readNodeCounts; }
static void projectFromNode(IDnum nodeID, ReadOccurence ** readNodes, IDnum * readNodeCounts, IDnum * readPairs, Category * cats, boolean * dubious, Coordinate * lengths) { IDnum index; ShortReadMarker *nodeArray, *shortMarker; PassageMarker *marker; Node *node; IDnum nodeReadCount; node = getNodeInGraph(graph, nodeID); if (node == NULL || !getUniqueness(node)) return; nodeArray = getNodeReads(node, graph); nodeReadCount = getNodeReadCount(node, graph); for (index = 0; index < nodeReadCount; index++) { shortMarker = getShortReadMarkerAtIndex(nodeArray, index); if (dubious[getShortReadMarkerID(shortMarker) - 1]) continue; projectFromShortRead(node, shortMarker, readPairs, cats, readNodes, readNodeCounts, lengths); } for (marker = getMarker(node); marker != NULL; marker = getNextInNode(marker)) { if (getPassageMarkerSequenceID(marker) > 0) projectFromLongRead(node, marker, readPairs, cats, readNodes, readNodeCounts, lengths); } }
// DEBUG void checkNode(Node* node) { PassageMarkerI marker1 = getMarker(node); if (marker1 == NULL_IDX) return; PassageMarkerI marker2 = getNextInNode(marker1); if (marker2 == NULL_IDX) return; if (getStartOffset(marker1) == getStartOffset(marker2)) abort(); if (getFinishOffset(marker1) == getFinishOffset(marker2)) abort(); printf(">>>> Node %li\n", (long) getNodeID(node)); printf("Marker1: %li - %li > %li (%li) \n", (long) getStartOffset(marker1), (long) getPassageMarkerLength(marker1), (long) (getNodeLength(node) - getFinishOffset(marker1)), (long) getFinishOffset(marker1)); printf("%s\n", readPassageMarker(marker1)); printf("Marker2: %li - %li > %li (%li) \n", (long) getStartOffset(marker2), (long) getPassageMarkerLength(marker2), (long) (getNodeLength(node) - getFinishOffset(marker2)), (long) getFinishOffset(marker2)); printf("%s\n", readPassageMarker(marker2)); if (getStartOffset(marker1) < getNodeLength(node) - getFinishOffset(marker2) && getStartOffset(marker2) < getNodeLength(node) - getFinishOffset(marker1)) { //abort(); ; } }
static void adjustLongReads(Node * target, Coordinate nodeLength) { PassageMarkerI marker; for (marker = getMarker(target); marker != NULL_IDX; marker = getNextInNode(marker)) incrementFinishOffset(marker, nodeLength); }
static void trimLongReadTips() { IDnum index; Node *node; PassageMarkerI marker, next; velvetLog("Trimming read tips\n"); for (index = 1; index <= nodeCount(graph); index++) { node = getNodeInGraph(graph, index); if (getUniqueness(node)) continue; for (marker = getMarker(node); marker != NULL_IDX; marker = next) { next = getNextInNode(marker); if (!isInitial(marker) && !isTerminal(marker)) continue; if (isTerminal(marker)) marker = getTwinMarker(marker); while (!getUniqueness(getNode(marker))) { if (next != NULL_IDX && (marker == next || marker == getTwinMarker(next))) next = getNextInNode(next); if (getNextInSequence(marker) != NULL_IDX) { marker = getNextInSequence(marker); destroyPassageMarker (getPreviousInSequence (marker)); } else { destroyPassageMarker(marker); break; } } } } }
static void admitGroupies(Node * source, Node * bypass) { PassageMarkerI marker, tmpMarker; for (marker = getMarker(source); marker != NULL_IDX; marker = tmpMarker) { tmpMarker = getNextInNode(marker); extractPassageMarker(marker); transposePassageMarker(marker, bypass); incrementFinishOffset(getTwinMarker(marker), getNodeLength(bypass)); } }
static void updateMembers(Node * bypass, Node * nextNode) { PassageMarkerI marker, next, tmp; Coordinate nextLength = getNodeLength(nextNode); // Update marker + arc info for (marker = getMarker(bypass); marker != NULL_IDX; marker = tmp) { tmp = getNextInNode(marker); if (!isTerminal(marker) && getNode(getNextInSequence(marker)) == nextNode) { // Marker steps right into target next = getNextInSequence(marker); disconnectNextPassageMarker(marker, graph); destroyPassageMarker(next); } else if (getUniqueness(nextNode) && goesToNode(marker, nextNode)) { // Marker goes indirectly to target while (getNode(getNextInSequence(marker)) != nextNode) { next = getNextInSequence(marker); disconnectNextPassageMarker(marker, graph); destroyPassageMarker(next); } next = getNextInSequence(marker); disconnectNextPassageMarker(marker, graph); destroyPassageMarker(next); } else if (!isTerminal(marker) && getFinishOffset(marker) == 0) { // Marker goes somewhere else than to target next = getNextInSequence(marker); incrementFinishOffset(marker, nextLength); } else { // Marker goes nowhere incrementFinishOffset(marker, nextLength); } } }
static void reconnectPassageMarker(PassageMarkerI marker, Node * node, PassageMarkerI * ptr) { PassageMarkerI current; PassageMarkerI next = getNextInSequence(marker); PassageMarkerI tmpMarker; for (current = marker; getNode(current) != node; current = getPreviousInSequence(current)); setPreviousInSequence(current, next); concatenatePassageMarkers(current, marker); // Removing node and all intermediaries while (marker != current) { tmpMarker = getPreviousInSequence(marker); if (*ptr == marker || *ptr == getTwinMarker(marker)) *ptr = getNextInNode(*ptr); setNextInSequence(marker, NULL_IDX); setPreviousInSequence(NULL_IDX, marker); destroyPassageMarker(marker); marker = tmpMarker; } }
// Replaces two consecutive nodes into a single equivalent node // The extra memory is freed void concatenateNodes(Node * nodeA, Node * nodeB, Graph * graph) { PassageMarkerI marker, tmpMarker; Node *twinA = getTwinNode(nodeA); Node *twinB = getTwinNode(nodeB); Arc *arc; Category cat; // Arc management: // Freeing useless arcs while (getArc(nodeA) != NULL) destroyArc(getArc(nodeA), graph); // Correct arcs for (arc = getArc(nodeB); arc != NULL; arc = getNextArc(arc)) { if (getDestination(arc) != twinB) createAnalogousArc(nodeA, getDestination(arc), arc, graph); else createAnalogousArc(nodeA, twinA, arc, graph); } // Passage marker management in node A: for (marker = getMarker(nodeA); marker != NULL_IDX; marker = getNextInNode(marker)) if (isTerminal(marker)) incrementFinishOffset(marker, getNodeLength(nodeB)); // Swapping new born passageMarkers from B to A for (marker = getMarker(nodeB); marker != NULL_IDX; marker = tmpMarker) { tmpMarker = getNextInNode(marker); if (isInitial(marker) || getNode(getPreviousInSequence(marker)) != nodeA) { extractPassageMarker(marker); transposePassageMarker(marker, nodeA); incrementFinishOffset(getTwinMarker(marker), getNodeLength(nodeA)); } else disconnectNextPassageMarker(getPreviousInSequence (marker), graph); } // Read starts concatenateReadStarts(nodeA, nodeB, graph); // Gaps appendNodeGaps(nodeA, nodeB, graph); // Descriptor management (node) appendDescriptors(nodeA, nodeB); // Update uniqueness: setUniqueness(nodeA, getUniqueness(nodeA) || getUniqueness(nodeB)); // Update virtual coverage for (cat = 0; cat < CATEGORIES; cat++) incrementVirtualCoverage(nodeA, cat, getVirtualCoverage(nodeB, cat)); // Update original virtual coverage for (cat = 0; cat < CATEGORIES; cat++) incrementOriginalVirtualCoverage(nodeA, cat, getOriginalVirtualCoverage (nodeB, cat)); // Freeing gobbled node destroyNode(nodeB, graph); }
// Replaces two consecutive nodes into a single equivalent node // The extra memory is freed void concatenateStringOfNodes(Node * nodeA, Graph * graph) { Node *twinA = getTwinNode(nodeA); Node * nodeB = nodeA; Node * twinB; Node *currentNode, *nextNode; Coordinate totalLength = 0; PassageMarkerI marker, tmpMarker; Arc *arc; Category cat; while (simpleArcCount(nodeB) == 1 && simpleArcCount(getTwinNode (getDestination(getArc(nodeB)))) == 1 && getDestination(getArc(nodeB)) != getTwinNode(nodeB) && getDestination(getArc(nodeB)) != nodeA) { totalLength += getNodeLength(nodeB); nodeB = getDestination(getArc(nodeB)); } twinB = getTwinNode(nodeB); totalLength += getNodeLength(nodeB); reallocateNodeDescriptor(nodeA, totalLength); currentNode = nodeA; while (currentNode != nodeB) { currentNode = getDestination(getArc(currentNode)); // Passage marker management in node A: for (marker = getMarker(nodeA); marker != NULL_IDX; marker = getNextInNode(marker)) if (getNode(getNextInSequence(marker)) != currentNode) incrementFinishOffset(marker, getNodeLength(currentNode)); // Swapping new born passageMarkers from B to A for (marker = getMarker(currentNode); marker != NULL_IDX; marker = tmpMarker) { tmpMarker = getNextInNode(marker); if (isInitial(marker) || getNode(getPreviousInSequence(marker)) != nodeA) { extractPassageMarker(marker); transposePassageMarker(marker, nodeA); incrementFinishOffset(getTwinMarker(marker), getNodeLength(nodeA)); } else disconnectNextPassageMarker(getPreviousInSequence (marker), graph); } // Read starts concatenateReadStarts(nodeA, currentNode, graph); // Gaps appendNodeGaps(nodeA, currentNode, graph); // Update uniqueness: setUniqueness(nodeA, getUniqueness(nodeA) || getUniqueness(currentNode)); // Update virtual coverage for (cat = 0; cat < CATEGORIES; cat++) incrementVirtualCoverage(nodeA, cat, getVirtualCoverage(currentNode, cat)); // Update original virtual coverage for (cat = 0; cat < CATEGORIES; cat++) incrementOriginalVirtualCoverage(nodeA, cat, getOriginalVirtualCoverage (currentNode, cat)); // Descriptor management (node) directlyAppendDescriptors(nodeA, currentNode, totalLength); } // Correct arcs for (arc = getArc(nodeB); arc != NULL; arc = getNextArc(arc)) { if (getDestination(arc) != twinB) createAnalogousArc(nodeA, getDestination(arc), arc, graph); else createAnalogousArc(nodeA, twinA, arc, graph); } // Freeing gobbled nodes currentNode = getTwinNode(nodeB); while (currentNode != getTwinNode(nodeA)) { arc = getArc(currentNode); nextNode = getDestination(arc); destroyNode(currentNode, graph); currentNode = nextNode; } }
static boolean uniqueNodesConnect(Node * startingNode) { Node *destination = NULL; PassageMarkerI startMarker, currentMarker; RBConnection *newList; RBConnection *list = NULL; boolean multipleHits = false; if (arcCount(startingNode) == 0) return false; if (getMarker(startingNode) == NULL_IDX) return false; dbgCounter++; // Checking for multiple destinations for (startMarker = getMarker(startingNode); startMarker != NULL_IDX; startMarker = getNextInNode(startMarker)) { if (getFinishOffset(startMarker) > 2 * getWordLength(graph)) continue; for (currentMarker = getNextInSequence(startMarker); currentMarker != NULL_IDX; currentMarker = getNextInSequence(currentMarker)) { if (!getUniqueness(getNode(currentMarker))) { continue; } else if (getNodeStatus(getNode(currentMarker))) { if (getStartOffset(currentMarker) > 2 * getWordLength(graph)) break; for (newList = list; newList != NULL; newList = newList->next) { if (newList->node == getNode(currentMarker)) { newList->multiplicity++; break; } } if (newList == NULL) abort(); break; } else { if (getStartOffset(currentMarker) > 2 * getWordLength(graph)) break; setSingleNodeStatus(getNode(currentMarker), true); newList = allocateRBConnection(); newList->node = getNode(currentMarker); newList->multiplicity = 1; newList->marker = startMarker; newList->next = list; list = newList; break; } } } while (list != NULL) { newList = list; list = newList->next; setSingleNodeStatus(newList->node, false); if (newList->multiplicity >= MULTIPLICITY_CUTOFF) { if (destination == NULL) { destination = newList->node; path = newList->marker; } else if (destination != newList->node) multipleHits = true; } deallocateRBConnection(newList); } if (multipleHits) { multCounter++; setUniqueness(startingNode, false); return false; } if (destination == NULL || destination == startingNode || destination == getTwinNode(startingNode)) { nullCounter++; return false; } // Check for reciprocity for (startMarker = getMarker(getTwinNode(destination)); startMarker != NULL_IDX; startMarker = getNextInNode(startMarker)) { if (getFinishOffset(startMarker) > 2 * getWordLength(graph)) continue; for (currentMarker = getNextInSequence(startMarker); currentMarker != NULL_IDX; currentMarker = getNextInSequence(currentMarker)) { if (!getUniqueness(getNode(currentMarker))) { continue; } else if (getNodeStatus(getNode(currentMarker))) { if (getStartOffset(currentMarker) > 2 * getWordLength(graph)) break; for (newList = list; newList != NULL; newList = newList->next) { if (newList->node == getNode(currentMarker)) { newList->multiplicity++; break; } } if (newList == NULL) abort(); break; } else { if (getStartOffset(currentMarker) > 2 * getWordLength(graph)) break; setSingleNodeStatus(getNode(currentMarker), true); newList = allocateRBConnection(); newList->node = getNode(currentMarker); newList->multiplicity = 1; newList->next = list; list = newList; break; } } } while (list != NULL) { newList = list; list = newList->next; setSingleNodeStatus(newList->node, false); if (newList->multiplicity >= MULTIPLICITY_CUTOFF && newList->node != getTwinNode(startingNode)) multipleHits = true; deallocateRBConnection(newList); } if (multipleHits) { multCounter++; setUniqueness(destination, false); return false; } // Aligning long reads to each other: // TODO // Merge pairwise alignments and produce consensus // TODO return true; }