Connection *createNewConnection(IDnum nodeID, IDnum node2ID, IDnum direct_count, IDnum paired_count, Coordinate distance, double variance) { Node *destination = getNodeInGraph(graph, node2ID); IDnum nodeIndex = nodeID + nodeCount(graph); Connection *connect = allocateConnection(); // Fill in connect->destination = destination; connect->direct_count = direct_count; connect->paired_count = paired_count; connect->distance = distance; connect->variance = variance; // Insert in scaffold connect->previous = NULL; connect->next = scaffold[nodeIndex]; if (scaffold[nodeIndex] != NULL) scaffold[nodeIndex]->previous = connect; scaffold[nodeIndex] = connect; // Event. pair up to twin if (getUniqueness(destination)) createTwinConnection(node2ID, nodeID, connect); else connect->twin = NULL; return connect; }
static void findOppositeNode(Node * node, Node ** oppositeNode, Coordinate * distance) { NodeList *nodeList; MiniConnection *localConnect; Node *node2; IDnum node2ID; *oppositeNode = NULL; *distance = 0; for (nodeList = markedNodes; nodeList != NULL; nodeList = nodeList->next) { node2 = nodeList->node; node2ID = getNodeID(node2); localConnect = &localScaffold[node2ID + nodeCount(graph)]; if (node2 == node) continue; if (!getUniqueness(node2)) continue; if (localConnect->distance < 0) continue; if (*oppositeNode == NULL || *distance > localConnect->distance) { *oppositeNode = node2; *distance = localConnect->distance; } } }
static void projectFromNode(IDnum nodeID, ReadOccurence ** readNodes, IDnum * readNodeCounts, IDnum * readPairs, Category * cats, boolean * dubious, Coordinate * lengths) { IDnum index; ShortReadMarker *nodeArray, *shortMarker; PassageMarker *marker; Node *node; IDnum nodeReadCount; node = getNodeInGraph(graph, nodeID); if (node == NULL || !getUniqueness(node)) return; nodeArray = getNodeReads(node, graph); nodeReadCount = getNodeReadCount(node, graph); for (index = 0; index < nodeReadCount; index++) { shortMarker = getShortReadMarkerAtIndex(nodeArray, index); if (dubious[getShortReadMarkerID(shortMarker) - 1]) continue; projectFromShortRead(node, shortMarker, readPairs, cats, readNodes, readNodeCounts, lengths); } for (marker = getMarker(node); marker != NULL; marker = getNextInNode(marker)) { if (getPassageMarkerSequenceID(marker) > 0) projectFromLongRead(node, marker, readPairs, cats, readNodes, readNodeCounts, lengths); } }
void renumberLocusNodes(Locus * locus) { IDnum index; Node * node; IDnum counter = 0; Node ** newArray; for (index = 0; index < locus->contigCount; index++) { node = locus->contigs[index]; if (!getNodeStatus(node)) { locus->contigs[index] = NULL; counter++; if (getUniqueness(node)) locus->longContigCount--; } } if (counter == 0) return; newArray = callocOrExit(locus->contigCount - counter, Node *); counter = 0; for (index = 0; index < locus->contigCount; index++) { node = locus->contigs[index]; if (node == NULL) counter++; else newArray[index - counter] = node; } free(locus->contigs); locus->contigs = newArray; locus->contigCount -= counter; }
static void clipTipsVeryHardLocally() { NodeList *nodeList, *next; Node *current, *twin; boolean modified = true; //velvetLog("Clipping short tips off graph HARD\n"); while (modified) { modified = false; for (nodeList = getMarkedNodeList(); nodeList != NULL; nodeList = next) { next = nodeList->next; current = nodeList->node; if (current == NULL || getNodeStatus(current) != 1) continue; if (getUniqueness(current)) continue; //velvetLog("Checking node HARD %li %i\n", (long)getNodeID(current), simpleArcCount(current)); twin = getTwinNode(current); if( isLocalDeadEnd(current) || isLocalTwinDeadEnd(current) ){ //velvetLog("Found tip at node %li\n", (long)getNodeID(current)); handicapNode(current); modified = true; } } } }
static void trimLongReadTips() { IDnum index; Node *node; PassageMarkerI marker, next; velvetLog("Trimming read tips\n"); for (index = 1; index <= nodeCount(graph); index++) { node = getNodeInGraph(graph, index); if (getUniqueness(node)) continue; for (marker = getMarker(node); marker != NULL_IDX; marker = next) { next = getNextInNode(marker); if (!isInitial(marker) && !isTerminal(marker)) continue; if (isTerminal(marker)) marker = getTwinMarker(marker); while (!getUniqueness(getNode(marker))) { if (next != NULL_IDX && (marker == next || marker == getTwinMarker(next))) next = getNextInNode(next); if (getNextInSequence(marker) != NULL_IDX) { marker = getNextInSequence(marker); destroyPassageMarker (getPreviousInSequence (marker)); } else { destroyPassageMarker(marker); break; } } } } }
void readCoherentGraph(Graph * inGraph, boolean(*isUnique) (Node * node), double coverage, ReadSet * reads) { IDnum nodeIndex; Node *node; IDnum previousNodeCount = 0; graph = inGraph; listMemory = newRecycleBin(sizeof(PassageMarkerList), 100000); expected_coverage = coverage; sequences = reads->tSequences; velvetLog("Read coherency...\n"); resetNodeStatus(graph); identifyUniqueNodes(isUnique); trimLongReadTips(); previousNodeCount = 0; while (previousNodeCount != nodeCount(graph)) { previousNodeCount = nodeCount(graph); for (nodeIndex = 1; nodeIndex <= nodeCount(graph); nodeIndex++) { node = getNodeInGraph(graph, nodeIndex); if (node == NULL || !getUniqueness(node)) continue; while (uniqueNodesConnect(node)) node = bypass(); node = getTwinNode(node); while (uniqueNodesConnect(node)) node = bypass(); } renumberNodes(graph); } destroyRecycleBin(listMemory); destroyRecycleBin(nodeListMemory); velvetLog("Confronted to %li multiple hits and %li null over %li\n", (long) multCounter, (long) nullCounter, (long) dbgCounter); velvetLog("Read coherency over!\n"); }
static void computeLocalNodeToNodeMappingsFromConnections(Connection * connect, Connection * connect2) { Node *node1 = getTwinNode(getConnectionDestination(connect)); Node *node2 = getTwinNode(getConnectionDestination(connect2)); IDnum nodeID1 = getNodeID(node1); IDnum nodeID2 = getNodeID(node2); Coordinate distance = getNodeLength(node1)/2 + getNodeLength(node2)/2; Arc *arc; if (getUniqueness(node1) || getUniqueness(node2)) return; if ((arc = getArcBetweenNodes(node1, node2, graph)) && !getConnectionBetweenNodes(node1, getTwinNode(node2))) { createConnection(nodeID1, -nodeID2, getMultiplicity(arc), 0, distance, 1 / (double) getMultiplicity(arc)); incrementConnectionWeight(getConnectionBetweenNodes (node1, getTwinNode(node2)), getMultiplicity(arc)); } if ((arc = getArcBetweenNodes(node2, node1, graph)) && !getConnectionBetweenNodes(node2, getTwinNode(node1))) { createConnection(nodeID2, -nodeID1, getMultiplicity(arc), 0, distance, 1 / (double) getMultiplicity(arc)); incrementConnectionWeight(getConnectionBetweenNodes (node2, getTwinNode(node1)), getMultiplicity(arc)); } }
static void propagateComponent(Node * node) { Connection *connect; if (getNodeStatus(node) || !getUniqueness(node)) return; setNodeStatus(node, true); for (connect = getConnection(node); connect != NULL; connect = getNextConnection(connect)) propagateComponent(getConnectionDestination(connect)); for (connect = getConnection(getTwinNode(node)); connect != NULL; connect = getNextConnection(connect)) propagateComponent(getConnectionDestination(connect)); }
static void computeLocalNodeToNodeMappings() { IDnum index; Node *node; puts("Computing local connections"); activateArcLookupTable(graph); for (index = -nodeCount(graph); index <= nodeCount(graph); index++) { node = getNodeInGraph(graph, index); if (node && getUniqueness(node)) computeLocalNodeToNodeMappingsFromNode(node); } deactivateArcLookupTable(graph); }
static boolean goesToNode(PassageMarkerI marker, Node * node) { PassageMarkerI current; Node * start = getNode(marker); for (current = getNextInSequence(marker); current != NULL_IDX; current = getNextInSequence(current)) { if (getNode(current) == node) return true; else if (getNode(current) == start) continue; else if (getUniqueness(getNode(current))) return false; } return false; }
static void fillUpComponent(Node * node) { Connection *connect; if (getNodeStatus(node) || !getUniqueness(node)) return; setSingleNodeStatus(node, true); recordNode(node); for (connect = getConnection(node); connect != NULL; connect = getNextConnection(connect)) fillUpComponent(getTwinNode (getConnectionDestination(connect))); for (connect = getConnection(getTwinNode(node)); connect != NULL; connect = getNextConnection(connect)) fillUpComponent(getConnectionDestination(connect)); }
static boolean comesFromNode(PassageMarkerI marker, Node * node) { Node *source = getNode(getTwinMarker(marker)); Node *target = getTwinNode(node); PassageMarkerI current; for (current = getNextInSequence(getTwinMarker(marker)); current != NULL_IDX; current = getNextInSequence(current)) { if (getNode(current) == target) return true; else if (getNode(current) == source) continue; else if (getUniqueness(getNode(current))) return false; } return false; }
static IDnum countConnectedComponents(Graph * graph) { IDnum index; IDnum count = 0; Node *node; resetNodeStatus(graph); for (index = 1; index <= nodeCount(graph); index++) { node = getNodeInGraph(graph, index); if (!getNodeStatus(node) && getUniqueness(node)) { count++; propagateComponent(node); } } return count; }
static Locus *extractConnectedComponents(IDnum locusCount) { Locus *loci = allocateLocusArray(locusCount); Locus *locus; IDnum index; IDnum locusIndex = 0; IDnum nodeIndex; Node *node; resetNodeStatus(graph); for (index = 1; index <= nodeCount(graph); index++) { node = getNodeInGraph(graph, index); if (!getNodeStatus(node) && getUniqueness(node)) { locus = getLocus(loci, locusIndex++); clearLocus(locus); // Long contigs fillUpComponent(node); setLongContigCount(locus, countMarkedNodes()); while (existsMarkedNode()) addContig(locus, popNodeRecord()); // Secondary contigs extendComponent(locus); setContigCount(locus, getLongContigCount(locus) + countMarkedNodes()); while (existsMarkedNode()) addContig(locus, popNodeRecord()); // Mark primary nodes so that their twins are not reused for (nodeIndex = 0; nodeIndex < getLongContigCount(locus); nodeIndex++) setNodeStatus(getContig(locus, nodeIndex), true); // Unmark secondary nodes so that they are available to other loci for (nodeIndex = getLongContigCount(locus); nodeIndex < getContigCount(locus); nodeIndex++) setNodeStatus(getContig(locus, nodeIndex), false); } } return loci; }
static void projectFromReadPair(Node * node, ReadOccurence * readOccurence, Coordinate position, Coordinate offset, Coordinate insertLength, double insertVariance) { Coordinate distance = insertLength; Coordinate variance = insertVariance; Node *target = getNodeInGraph(graph, readOccurence->nodeID); if (target == getTwinNode(node) || target == node) return; if (getUniqueness(target) && getNodeID(target) < getNodeID(node)) return; if (position < 0) { variance += getNodeLength(node) * getNodeLength(node) / 16; // distance += 0; } else { // variance += 0; distance += position - offset - getNodeLength(node) / 2; } if (readOccurence->position < 0) { variance += getNodeLength(target) * getNodeLength(target) / 16; //distance += 0; } else { // variance += 0; distance += readOccurence->position - readOccurence->offset - getNodeLength(target) / 2; } if (distance - getNodeLength(node)/2 - getNodeLength(target)/2 < -6 * sqrt(insertVariance)) return; else if (distance < getNodeLength(node)/2 + getNodeLength(target)/2) distance = getNodeLength(node)/2 + getNodeLength(target)/2; createConnection(getNodeID(node), getNodeID(target), 0, 1, distance, variance); }
static boolean goesToNode(PassageMarkerI marker, Node * node) { PassageMarkerI current; Node * start = getNode(marker); Node * twinStart = getTwinNode(start); Node * currentNode; for (current = getNextInSequence(marker); current != NULL_IDX; current = getNextInSequence(current)) { currentNode = getNode(current); if (currentNode == start || currentNode == twinStart) return false; else if (currentNode == node) return true; else if (getUniqueness(currentNode)) return false; } return false; }
static boolean expandLongNodes(boolean force_jumps) { IDnum nodeID; Node *node; boolean modified = false; for (nodeID = 1; nodeID <= nodeCount(graph); nodeID++) { node = getNodeInGraph(graph, nodeID); if (node != NULL && getUniqueness(node)) { modified = expandLongNode(node, force_jumps) || modified; modified = expandLongNode(getTwinNode(node), force_jumps) || modified; } } return modified; }
static void updateMembers(Node * bypass, Node * nextNode) { PassageMarkerI marker, next, tmp; Coordinate nextLength = getNodeLength(nextNode); // Update marker + arc info for (marker = getMarker(bypass); marker != NULL_IDX; marker = tmp) { tmp = getNextInNode(marker); if (!isTerminal(marker) && getNode(getNextInSequence(marker)) == nextNode) { // Marker steps right into target next = getNextInSequence(marker); disconnectNextPassageMarker(marker, graph); destroyPassageMarker(next); } else if (getUniqueness(nextNode) && goesToNode(marker, nextNode)) { // Marker goes indirectly to target while (getNode(getNextInSequence(marker)) != nextNode) { next = getNextInSequence(marker); disconnectNextPassageMarker(marker, graph); destroyPassageMarker(next); } next = getNextInSequence(marker); disconnectNextPassageMarker(marker, graph); destroyPassageMarker(next); } else if (!isTerminal(marker) && getFinishOffset(marker) == 0) { // Marker goes somewhere else than to target next = getNextInSequence(marker); incrementFinishOffset(marker, nextLength); } else { // Marker goes nowhere incrementFinishOffset(marker, nextLength); } } }
static boolean testConnection(IDnum IDA, Connection * connect, IDnum ** counts) { IDnum total = 0; Category cat; // Spare unique -> undetermined node connections if (!getUniqueness(connect->destination)) return true; // Destroy tenuous connections if (connect->paired_count + connect->direct_count < UNRELIABLE_CONNECTION_CUTOFF) return false; for (cat = 0; cat <= CATEGORIES; cat++) total += expectedNumberOfConnections(IDA, connect, counts, cat); // Remove inconsistent connections return connect->paired_count >= total / 10; }
static void identifyUniqueNodes(boolean(*isUniqueFunction) (Node *)) { IDnum index; Node *node; IDnum counter = 0; velvetLog("Identifying unique nodes\n"); for (index = 1; index <= nodeCount(graph); index++) { node = getNodeInGraph(graph, index); if (node == NULL) continue; setUniqueness(node, isUniqueFunction(node)); if (getUniqueness(node)) counter++; } velvetLog("Done, %li unique nodes counted\n", (long) counter); }
static Node *bypass() { Node *bypass = getNode(path); Node *next = NULL; Arc *arc; PassageMarkerI nextMarker; // Remove unwanted arcs while (getArc(bypass) != NULL) destroyArc(getArc(bypass), graph); // Update extensive variables (length + descriptors + passage markers) while (!isTerminal(path)) { nextMarker = getNextInSequence(path); next = getNode(nextMarker); while (next == bypass) { disconnectNextPassageMarker(path, graph); destroyPassageMarker(nextMarker); nextMarker = getNextInSequence(path); next = getNode(nextMarker); } if (next == NULL) return bypass; // Overall node update if (!getUniqueness(next)) { adjustShortReads(bypass, getNextInSequence(path)); appendSequence(bypass, sequences, getNextInSequence(path), graph); } else { concatenateReadStarts(bypass, next, graph); #ifndef SINGLE_COV_CAT Category cat; for (cat = 0; cat < CATEGORIES; cat++) { // Update virtual coverage incrementVirtualCoverage(bypass, cat, getVirtualCoverage(next, cat)); // Update original virtual coverage incrementOriginalVirtualCoverage(bypass, cat, getOriginalVirtualCoverage(next, cat)); } #else incrementVirtualCoverage(bypass, getVirtualCoverage(next)); #endif appendDescriptors(bypass, next); } // Members updateMembers(bypass, next); // Termination if (isTerminal(path) || getUniqueness(next)) break; } // Remove unique groupies from arrival admitGroupies(next, bypass); // Copy destination arcs for (arc = getArc(next); arc != NULL; arc = getNextArc(arc)) { if (getDestination(arc) == next) continue; else if (getDestination(arc) == getTwinNode(next)) createAnalogousArc(bypass, getTwinNode(bypass), arc, graph); else createAnalogousArc(bypass, getDestination(arc), arc, graph); } destroyNode(next, graph); return bypass; }
static boolean pushNeighbours(Node * node, Node * oppositeNode, Coordinate distance, boolean force_jumps) { Node *candidate; Coordinate oldLength = getNodeLength(node); MiniConnection *localConnect; NodeList *path, *tmp; if ((path = pathIsClear(node, oppositeNode, distance))) { while (path) { candidate = path->node; tmp = path->next; deallocateNodeList(path); path = tmp; /////////////////////////////////////// // Stepping forward to destination // /////////////////////////////////////// if (getUniqueness(candidate)) { concatenateReadStarts(node, candidate, graph); concatenateLongReads(node, candidate, graph); absorbExtension(node, candidate); // Scaffold changes recenterNode(node, oldLength); recenterLocalScaffold(node, oldLength); absorbExtensionInScaffold(node, candidate); // Read coverage #ifndef SINGLE_COV_CAT Category cat; for (cat = 0; cat < CATEGORIES; cat++) { incrementVirtualCoverage(node, cat, getVirtualCoverage(candidate, cat)); incrementOriginalVirtualCoverage(node, cat, getOriginalVirtualCoverage(candidate, cat)); } #else incrementVirtualCoverage(node, getVirtualCoverage(candidate)); #endif if (getNodeStatus(candidate)) { localConnect = &localScaffold[getNodeID (candidate) + nodeCount (graph)]; if (localConnect->frontReference) { destroyConnection (localConnect-> frontReference, getNodeID(node)); localConnect-> frontReference = NULL; } if (localConnect->backReference) { destroyConnection (localConnect-> backReference, -getNodeID(node)); localConnect-> backReference = NULL; } unmarkNode(candidate, localConnect); } if (getNodeStatus(getTwinNode(candidate))) { localConnect = &localScaffold[-getNodeID (candidate) + nodeCount (graph)]; if (localConnect->frontReference) { destroyConnection (localConnect-> frontReference, getNodeID(node)); localConnect-> frontReference = NULL; } if (localConnect->backReference) { destroyConnection (localConnect-> backReference, -getNodeID(node)); localConnect-> backReference = NULL; } unmarkNode(getTwinNode(candidate), localConnect); } destroyNode(candidate, graph); return true; } else { adjustShortReads(node, candidate); adjustLongReads(node, getNodeLength(candidate)); absorbExtension(node, candidate); } } } if (force_jumps && oppositeNode && abs_ID(getNodeID(oppositeNode)) < abs_ID(getNodeID(node))) { distance -= getNodeLength(node) / 2; distance -= getNodeLength(oppositeNode) / 2; if (distance > 10) { adjustShortReadsByLength(node, distance); adjustLongReads(node, distance); appendGap(node, distance, graph); } else { adjustShortReadsByLength(node, 10); adjustLongReads(node, 10); appendGap(node, 10, graph); } concatenateReadStarts(node, oppositeNode, graph); concatenateLongReads(node, oppositeNode, graph); absorbExtension(node, oppositeNode); // Scaffold changes recenterNode(node, oldLength); recenterLocalScaffold(node, oldLength); absorbExtensionInScaffold(node, oppositeNode); // Read coverage #ifndef SINGLE_COV_CAT Category cat; for (cat = 0; cat < CATEGORIES; cat++) incrementVirtualCoverage(node, cat, getVirtualCoverage(oppositeNode, cat)); #else incrementVirtualCoverage(node, getVirtualCoverage(oppositeNode)); #endif if (getNodeStatus(oppositeNode)) { localConnect = &localScaffold[getNodeID(oppositeNode) + nodeCount(graph)]; if (localConnect->frontReference) { destroyConnection(localConnect-> frontReference, getNodeID(node)); localConnect->frontReference = NULL; } if (localConnect->backReference) { destroyConnection(localConnect-> backReference, -getNodeID(node)); localConnect->backReference = NULL; } unmarkNode(oppositeNode, localConnect); } if (getNodeStatus(getTwinNode(oppositeNode))) { localConnect = &localScaffold[-getNodeID(oppositeNode) + nodeCount(graph)]; if (localConnect->frontReference) { destroyConnection(localConnect-> frontReference, getNodeID(node)); localConnect->frontReference = NULL; } if (localConnect->backReference) { destroyConnection(localConnect-> backReference, -getNodeID(node)); localConnect->backReference = NULL; } unmarkNode(getTwinNode(oppositeNode), localConnect); } destroyNode(oppositeNode, graph); } return false; }
static NodeList *pathIsClear(Node * node, Node * oppositeNode, Coordinate distance) { Arc *arc; Node *candidate, *dest, *current; Coordinate extension_distance = 0; boolean maxRepeat = 1; Node *repeatEntrance = NULL; IDnum counter = 0; NodeList *path = NULL; NodeList *tail = path; setSingleNodeStatus(node, 2); current = node; while (true) { ////////////////////////////////// // Selecting destination // ////////////////////////////////// candidate = NULL; // First round for priority nodes for (arc = getArc(current); arc != NULL; arc = getNextArc(arc)) { dest = getDestination(arc); if (dest == node || dest == getTwinNode(node)) continue; if (getNodeStatus(dest) <= 0) continue; if (candidate == NULL || getNodeStatus(candidate) > getNodeStatus(dest) || (getNodeStatus(candidate) == getNodeStatus(dest) && extension_distance > localScaffold[getNodeID(dest) + nodeCount(graph)]. distance - getNodeLength(dest) / 2)) { extension_distance = localScaffold[getNodeID(dest) + nodeCount(graph)]. distance - getNodeLength(dest) / 2; candidate = dest; } } // In case of failure if (candidate == NULL) { for (arc = getArc(current); arc != NULL; arc = getNextArc(arc)) { dest = getDestination(arc); if (getNodeStatus(dest) == 0) continue; if (dest == node || dest == getTwinNode(node)) continue; if (candidate == NULL || getNodeStatus(candidate) < getNodeStatus(dest) || (getNodeStatus(candidate) == getNodeStatus(dest) && extension_distance < localScaffold[getNodeID(dest) + nodeCount(graph)]. distance - getNodeLength(dest) / 2)) { extension_distance = localScaffold[getNodeID(dest) + nodeCount (graph)]. distance - getNodeLength(dest) / 2; candidate = dest; } } } if (candidate == NULL) { while (path) { tail = path->next; deallocateNodeList(path); path = tail; } return false; } // Loop detection if (candidate == repeatEntrance && abs_bool(getNodeStatus(candidate)) == maxRepeat + 1) { while (path) { tail = path->next; deallocateNodeList(path); path = tail; } return false; } else if (abs_bool(getNodeStatus(candidate)) > maxRepeat) { maxRepeat = abs_bool(getNodeStatus(candidate)); repeatEntrance = candidate; } else if (abs_bool(getNodeStatus(candidate)) == 1) { maxRepeat = 1; repeatEntrance = NULL; } if (getNodeStatus(candidate) > 0) setSingleNodeStatus(candidate, getNodeStatus(candidate) + 1); else setSingleNodeStatus(candidate, getNodeStatus(candidate) - 1); if (abs_bool(getNodeStatus(candidate)) > 100 || counter > nodeCount(graph)) { while (path) { tail = path->next; deallocateNodeList(path); path = tail; } return false; } // Missassembly detection if (getUniqueness(candidate) && oppositeNode && candidate != oppositeNode && extension_distance > distance) { while (path) { tail = path->next; deallocateNodeList(path); path = tail; } return false; } if (path == NULL) { path = allocateNodeList(); path->next = NULL; path->node = candidate; tail = path; } else { tail->next = allocateNodeList(); tail = tail->next; tail->node = candidate; tail->next = NULL; } if (getUniqueness(candidate)) return path; current = candidate; } }
static void projectFromSingleRead(Node * node, ReadOccurence * readOccurence, Coordinate position, Coordinate offset, Coordinate length) { Coordinate distance = 0; Node *target = getNodeInGraph(graph, -readOccurence->nodeID); double variance = 1; if (target == getTwinNode(node) || target == node) return; if (getUniqueness(target) && getNodeID(target) < getNodeID(node)) return; if (position < 0) { variance += getNodeLength(node) * getNodeLength(node) / 16; // distance += 0; } else { // variance += 0; distance += position - getNodeLength(node) / 2; } if (readOccurence->position < 0) { variance += getNodeLength(target) * getNodeLength(target) / 16; //distance += 0; } else { // variance += 0; distance += -readOccurence->position + getNodeLength(target) / 2; } if (readOccurence->offset < 0 || offset < 0) { variance += length * length / 16; //distance += 0; } else { // variance += 0; distance += readOccurence->offset - offset; } // Relative ordering if (offset > 0 && readOccurence->offset > 0) { if (offset < readOccurence->offset) { if (distance - getNodeLength(node)/2 - getNodeLength(target)/2 < -10) ; else if (distance < getNodeLength(node)/2 + getNodeLength(target)/2) createConnection(getNodeID(node), getNodeID(target), 1, 0, getNodeLength(node)/2 + getNodeLength(target)/2, variance); else createConnection(getNodeID(node), getNodeID(target), 1, 0, distance, variance); } else if (offset > readOccurence->offset) { if (-distance - getNodeLength(node)/2 - getNodeLength(target)/2 < -10) ; else if (-distance < getNodeLength(node)/2 + getNodeLength(target)/2) createConnection(-getNodeID(node), -getNodeID(target), 1, 0, getNodeLength(node)/2 + getNodeLength(target)/2 , variance); else createConnection(-getNodeID(node), -getNodeID(target), 1, 0, -distance, variance); } } else if (offset > 0 && position > 0) { if (distance - offset > -getNodeLength(node)/2 && distance - offset + length > getNodeLength(node)/2) createConnection(getNodeID(node), getNodeID(target), 1, 0, getNodeLength(node)/2 + getNodeLength(target)/2, variance); else if (distance - offset < -getNodeLength(node)/2 && distance - offset + length < getNodeLength(node)/2) createConnection(-getNodeID(node), -getNodeID(target), 1, 0, getNodeLength(node)/2 + getNodeLength(target)/2, variance); else { createConnection(getNodeID(node), getNodeID(target), 1, 0, getNodeLength(node)/2 + getNodeLength(target)/2, variance); createConnection(-getNodeID(node), -getNodeID(target), 1, 0, getNodeLength(node)/2 + getNodeLength(target)/2, variance); } } else if (readOccurence->offset > 0 && readOccurence->position > 0) { if (-distance - readOccurence->offset > -getNodeLength(target)/2 && -distance - readOccurence->offset + length > getNodeLength(target)/2) createConnection(-getNodeID(node), -getNodeID(target), 1, 0, getNodeLength(node)/2 + getNodeLength(target)/2, variance); if (-distance - readOccurence->offset < -getNodeLength(target)/2 && -distance - readOccurence->offset + length < getNodeLength(target)/2) createConnection(getNodeID(node), getNodeID(target), 1, 0, getNodeLength(node)/2 + getNodeLength(target)/2, variance); else { createConnection(getNodeID(node), getNodeID(target), 1, 0, getNodeLength(node)/2 + getNodeLength(target)/2, variance); createConnection(-getNodeID(node), -getNodeID(target), 1, 0, getNodeLength(node)/2 + getNodeLength(target)/2, variance); } } else { createConnection(getNodeID(node), getNodeID(target), 1, 0, getNodeLength(node)/2 + getNodeLength(target)/2, variance); createConnection(-getNodeID(node), -getNodeID(target), 1, 0, getNodeLength(node)/2 + getNodeLength(target)/2, variance); } }
static void projectFromReadPair(Node * node, ReadOccurence * readOccurence, Coordinate position, Coordinate offset, Coordinate insertLength, double insertVariance, boolean weight) { Coordinate distance = insertLength; Coordinate variance = insertVariance; Node *target = getNodeInGraph(graph, readOccurence->nodeID); Connection *connect; double score; // Filter for useless reads: if (readOccurence->position == -1 && readOccurence->offset == -1) return; if (target == getTwinNode(node) || target == node) return; if (getUniqueness(target) && getNodeID(target) < getNodeID(node)) return; if (weight) { if (position > 0 && readOccurence->position > 0 && (connect = getConnectionBetweenNodes(node, target))) { distance = getConnectionDistance(connect); distance -= position - offset - getNodeLength(node) / 2; distance -= readOccurence->position - readOccurence->offset - getNodeLength(target) / 2; score = K * exp((insertLength - distance) * (distance - insertLength) / (2 * insertVariance)); incrementConnectionWeight(connect, score); } return; } if (position < 0) { variance += getNodeLength(node) * getNodeLength(node) / 16; // distance += 0; } else { // variance += 0; distance += position - offset - getNodeLength(node) / 2; } if (readOccurence->position < 0) { variance += getNodeLength(target) * getNodeLength(target) / 16; //distance += 0; } else { // variance += 0; distance += readOccurence->position - readOccurence->offset - getNodeLength(target) / 2; } if (distance - getNodeLength(node) / 2 - getNodeLength(target) / 2 < -6 * sqrt(insertVariance)) return; createConnection(getNodeID(node), getNodeID(target), 0, 1, distance, variance); }
// Replaces two consecutive nodes into a single equivalent node // The extra memory is freed void concatenateStringOfNodes(Node * nodeA, Graph * graph) { Node *twinA = getTwinNode(nodeA); Node * nodeB = nodeA; Node * twinB; Node *currentNode, *nextNode; Coordinate totalLength = 0; PassageMarkerI marker, tmpMarker; Arc *arc; Category cat; while (simpleArcCount(nodeB) == 1 && simpleArcCount(getTwinNode (getDestination(getArc(nodeB)))) == 1 && getDestination(getArc(nodeB)) != getTwinNode(nodeB) && getDestination(getArc(nodeB)) != nodeA) { totalLength += getNodeLength(nodeB); nodeB = getDestination(getArc(nodeB)); } twinB = getTwinNode(nodeB); totalLength += getNodeLength(nodeB); reallocateNodeDescriptor(nodeA, totalLength); currentNode = nodeA; while (currentNode != nodeB) { currentNode = getDestination(getArc(currentNode)); // Passage marker management in node A: for (marker = getMarker(nodeA); marker != NULL_IDX; marker = getNextInNode(marker)) if (getNode(getNextInSequence(marker)) != currentNode) incrementFinishOffset(marker, getNodeLength(currentNode)); // Swapping new born passageMarkers from B to A for (marker = getMarker(currentNode); marker != NULL_IDX; marker = tmpMarker) { tmpMarker = getNextInNode(marker); if (isInitial(marker) || getNode(getPreviousInSequence(marker)) != nodeA) { extractPassageMarker(marker); transposePassageMarker(marker, nodeA); incrementFinishOffset(getTwinMarker(marker), getNodeLength(nodeA)); } else disconnectNextPassageMarker(getPreviousInSequence (marker), graph); } // Read starts concatenateReadStarts(nodeA, currentNode, graph); // Gaps appendNodeGaps(nodeA, currentNode, graph); // Update uniqueness: setUniqueness(nodeA, getUniqueness(nodeA) || getUniqueness(currentNode)); // Update virtual coverage for (cat = 0; cat < CATEGORIES; cat++) incrementVirtualCoverage(nodeA, cat, getVirtualCoverage(currentNode, cat)); // Update original virtual coverage for (cat = 0; cat < CATEGORIES; cat++) incrementOriginalVirtualCoverage(nodeA, cat, getOriginalVirtualCoverage (currentNode, cat)); // Descriptor management (node) directlyAppendDescriptors(nodeA, currentNode, totalLength); } // Correct arcs for (arc = getArc(nodeB); arc != NULL; arc = getNextArc(arc)) { if (getDestination(arc) != twinB) createAnalogousArc(nodeA, getDestination(arc), arc, graph); else createAnalogousArc(nodeA, twinA, arc, graph); } // Freeing gobbled nodes currentNode = getTwinNode(nodeB); while (currentNode != getTwinNode(nodeA)) { arc = getArc(currentNode); nextNode = getDestination(arc); destroyNode(currentNode, graph); currentNode = nextNode; } }
static void integrateDerivativeDistances(Connection * connect, Coordinate min_distance, boolean direction) { Node *reference = getConnectionDestination(connect); Node *destination; IDnum destinationID; Coordinate distance, baseDistance; double variance, baseVariance; Connection *connect2; MiniConnection *localConnect; // debug IDnum counter = 0; if (!getUniqueness(reference)) return; //velvetLog("Opposite node %li length %li at %li ± %f\n", getNodeID(reference), getNodeLength(reference), getConnectionDistance(connect), getConnectionVariance(connect)); baseDistance = getConnectionDistance(connect); baseVariance = getConnectionVariance(connect); for (connect2 = getConnection(reference); connect2 != NULL; connect2 = getNextConnection(connect2)) { // Avoid null derivative if (connect2 == getTwinConnection(connect)) continue; destination = getConnectionDestination(connect2); // Beware of directionality if (!direction) destination = getTwinNode(destination); // Derivate values destinationID = getNodeID(destination); // Beware of directionality (bis) if (direction) distance = baseDistance - getConnectionDistance(connect2); else distance = getConnectionDistance(connect2) - baseDistance; variance = getConnectionVariance(connect2) + baseVariance; localConnect = &localScaffold[destinationID + nodeCount(graph)]; // Avoid over-projection if (distance < min_distance) { //velvetLog("Node %li not at distance %li± %f (min %li)\n", destinationID, distance, variance, min_distance); continue; } counter++; if (getNodeStatus(destination)) { readjustMiniConnection(destination, localConnect, distance, min_distance, variance, NULL, NULL); } else resetMiniConnection(destination, localConnect, distance, variance, NULL, NULL, true); //velvetLog("Node %li now at distance %li\n", destinationID, localConnect->distance); } //velvetLog("%li secondary distances added\n", counter); }
// Replaces two consecutive nodes into a single equivalent node // The extra memory is freed void concatenateNodes(Node * nodeA, Node * nodeB, Graph * graph) { PassageMarkerI marker, tmpMarker; Node *twinA = getTwinNode(nodeA); Node *twinB = getTwinNode(nodeB); Arc *arc; Category cat; // Arc management: // Freeing useless arcs while (getArc(nodeA) != NULL) destroyArc(getArc(nodeA), graph); // Correct arcs for (arc = getArc(nodeB); arc != NULL; arc = getNextArc(arc)) { if (getDestination(arc) != twinB) createAnalogousArc(nodeA, getDestination(arc), arc, graph); else createAnalogousArc(nodeA, twinA, arc, graph); } // Passage marker management in node A: for (marker = getMarker(nodeA); marker != NULL_IDX; marker = getNextInNode(marker)) if (isTerminal(marker)) incrementFinishOffset(marker, getNodeLength(nodeB)); // Swapping new born passageMarkers from B to A for (marker = getMarker(nodeB); marker != NULL_IDX; marker = tmpMarker) { tmpMarker = getNextInNode(marker); if (isInitial(marker) || getNode(getPreviousInSequence(marker)) != nodeA) { extractPassageMarker(marker); transposePassageMarker(marker, nodeA); incrementFinishOffset(getTwinMarker(marker), getNodeLength(nodeA)); } else disconnectNextPassageMarker(getPreviousInSequence (marker), graph); } // Read starts concatenateReadStarts(nodeA, nodeB, graph); // Gaps appendNodeGaps(nodeA, nodeB, graph); // Descriptor management (node) appendDescriptors(nodeA, nodeB); // Update uniqueness: setUniqueness(nodeA, getUniqueness(nodeA) || getUniqueness(nodeB)); // Update virtual coverage for (cat = 0; cat < CATEGORIES; cat++) incrementVirtualCoverage(nodeA, cat, getVirtualCoverage(nodeB, cat)); // Update original virtual coverage for (cat = 0; cat < CATEGORIES; cat++) incrementOriginalVirtualCoverage(nodeA, cat, getOriginalVirtualCoverage (nodeB, cat)); // Freeing gobbled node destroyNode(nodeB, graph); }
static boolean uniqueNodesConnect(Node * startingNode) { Node *destination = NULL; PassageMarkerI startMarker, currentMarker; RBConnection *newList; RBConnection *list = NULL; boolean multipleHits = false; if (arcCount(startingNode) == 0) return false; if (getMarker(startingNode) == NULL_IDX) return false; dbgCounter++; // Checking for multiple destinations for (startMarker = getMarker(startingNode); startMarker != NULL_IDX; startMarker = getNextInNode(startMarker)) { if (getFinishOffset(startMarker) > 2 * getWordLength(graph)) continue; for (currentMarker = getNextInSequence(startMarker); currentMarker != NULL_IDX; currentMarker = getNextInSequence(currentMarker)) { if (!getUniqueness(getNode(currentMarker))) { continue; } else if (getNodeStatus(getNode(currentMarker))) { if (getStartOffset(currentMarker) > 2 * getWordLength(graph)) break; for (newList = list; newList != NULL; newList = newList->next) { if (newList->node == getNode(currentMarker)) { newList->multiplicity++; break; } } if (newList == NULL) abort(); break; } else { if (getStartOffset(currentMarker) > 2 * getWordLength(graph)) break; setSingleNodeStatus(getNode(currentMarker), true); newList = allocateRBConnection(); newList->node = getNode(currentMarker); newList->multiplicity = 1; newList->marker = startMarker; newList->next = list; list = newList; break; } } } while (list != NULL) { newList = list; list = newList->next; setSingleNodeStatus(newList->node, false); if (newList->multiplicity >= MULTIPLICITY_CUTOFF) { if (destination == NULL) { destination = newList->node; path = newList->marker; } else if (destination != newList->node) multipleHits = true; } deallocateRBConnection(newList); } if (multipleHits) { multCounter++; setUniqueness(startingNode, false); return false; } if (destination == NULL || destination == startingNode || destination == getTwinNode(startingNode)) { nullCounter++; return false; } // Check for reciprocity for (startMarker = getMarker(getTwinNode(destination)); startMarker != NULL_IDX; startMarker = getNextInNode(startMarker)) { if (getFinishOffset(startMarker) > 2 * getWordLength(graph)) continue; for (currentMarker = getNextInSequence(startMarker); currentMarker != NULL_IDX; currentMarker = getNextInSequence(currentMarker)) { if (!getUniqueness(getNode(currentMarker))) { continue; } else if (getNodeStatus(getNode(currentMarker))) { if (getStartOffset(currentMarker) > 2 * getWordLength(graph)) break; for (newList = list; newList != NULL; newList = newList->next) { if (newList->node == getNode(currentMarker)) { newList->multiplicity++; break; } } if (newList == NULL) abort(); break; } else { if (getStartOffset(currentMarker) > 2 * getWordLength(graph)) break; setSingleNodeStatus(getNode(currentMarker), true); newList = allocateRBConnection(); newList->node = getNode(currentMarker); newList->multiplicity = 1; newList->next = list; list = newList; break; } } } while (list != NULL) { newList = list; list = newList->next; setSingleNodeStatus(newList->node, false); if (newList->multiplicity >= MULTIPLICITY_CUTOFF && newList->node != getTwinNode(startingNode)) multipleHits = true; deallocateRBConnection(newList); } if (multipleHits) { multCounter++; setUniqueness(destination, false); return false; } // Aligning long reads to each other: // TODO // Merge pairwise alignments and produce consensus // TODO return true; }