void removeLowCovEdges (int lenCutoff, unsigned short covCutoff) { unsigned int bal_ed; unsigned int arcRight_n, arcLeft_n; ARC *arcLeft, *arcRight; unsigned int i; int counter = 0; for (i = 1; i <= num_ed; i++) { if (edge_array[i].deleted || edge_array[i].cvg == 0 || edge_array[i].cvg > covCutoff * 10 || edge_array[i].length >= lenCutoff || EdSameAsTwin (i) || edge_array[i].length == 0) { continue; } bal_ed = getTwinEdge (i); arcRight = arcCount (i, &arcRight_n); arcLeft = arcCount (bal_ed, &arcLeft_n); if (arcLeft_n < 1 || arcRight_n < 1) { continue; } destroyEdge (i); counter++; } printf ("Remove low coverage(%d): %d inner edges destroyed\n", covCutoff, counter); removeDeadArcs (); linearConcatenate (); compactEdgeArray (); }
void removeWeakEdges (int lenCutoff, unsigned int multiCutoff) { unsigned int bal_ed; unsigned int arcRight_n, arcLeft_n; ARC *arcLeft, *arcRight; unsigned int i; int counter = 0; for (i = 1; i <= num_ed; i++) { if (edge_array[i].deleted || edge_array[i].length == 0 || edge_array[i].length > lenCutoff || EdSameAsTwin (i)) { continue; } bal_ed = getTwinEdge (i); arcRight = arcCount (i, &arcRight_n); if (arcRight_n > 1 || !arcRight || arcRight->multiplicity > multiCutoff) { continue; } arcLeft = arcCount (bal_ed, &arcLeft_n); if (arcLeft_n > 1 || !arcLeft || arcLeft->multiplicity > multiCutoff) { continue; } destroyEdge (i); counter++; } printf ("%d weak inner edges destroyed\n", counter); removeDeadArcs (); /* linearConcatenate(); compactEdgeArray(); */ }
boolean isUnreliableTip_strict (unsigned int edgeid, int cutLen) { unsigned int arcRight_n, arcLeft_n; unsigned int bal_ed; unsigned int currentEd = edgeid; int length = 0; unsigned int mult = 0; ARC *arc, *activeArc = NULL, *tempArc; if (edgeid == 0) { return 0; } bal_ed = getTwinEdge (edgeid); if (bal_ed == edgeid) { return 0; } arcCount (bal_ed, &arcLeft_n); if (arcLeft_n > 0) { return 0; } while (currentEd) { arcCount (bal_ed, &arcLeft_n); tempArc = arcCount (currentEd, &arcRight_n); if (arcLeft_n > 1 || arcRight_n > 1) { if (arcLeft_n == 0 || length == 0) { return 0; } else { break; } } length += edge_array[currentEd].length; if (length >= cutLen) { return 0; } if (tempArc) { activeArc = tempArc; currentEd = activeArc->to_ed; bal_ed = getTwinEdge (currentEd); } else { currentEd = 0; } } if (currentEd == 0) { caseA++; return 1; } if (!activeArc) { printf ("no activeArc while checking edge %d\n", edgeid); } if (activeArc->multiplicity == 1) { caseB++; return 1; } for (arc = edge_array[bal_ed].arcs; arc != NULL; arc = arc->next) if (arc->multiplicity > mult) { mult = arc->multiplicity; } if (mult > activeArc->multiplicity) { caseC++; } return mult > activeArc->multiplicity; }
static boolean uniqueNodesConnect(Node * startingNode) { Node *destination = NULL; PassageMarkerI startMarker, currentMarker; RBConnection *newList; RBConnection *list = NULL; boolean multipleHits = false; if (arcCount(startingNode) == 0) return false; if (getMarker(startingNode) == NULL_IDX) return false; dbgCounter++; // Checking for multiple destinations for (startMarker = getMarker(startingNode); startMarker != NULL_IDX; startMarker = getNextInNode(startMarker)) { if (getFinishOffset(startMarker) > 2 * getWordLength(graph)) continue; for (currentMarker = getNextInSequence(startMarker); currentMarker != NULL_IDX; currentMarker = getNextInSequence(currentMarker)) { if (!getUniqueness(getNode(currentMarker))) { continue; } else if (getNodeStatus(getNode(currentMarker))) { if (getStartOffset(currentMarker) > 2 * getWordLength(graph)) break; for (newList = list; newList != NULL; newList = newList->next) { if (newList->node == getNode(currentMarker)) { newList->multiplicity++; break; } } if (newList == NULL) abort(); break; } else { if (getStartOffset(currentMarker) > 2 * getWordLength(graph)) break; setSingleNodeStatus(getNode(currentMarker), true); newList = allocateRBConnection(); newList->node = getNode(currentMarker); newList->multiplicity = 1; newList->marker = startMarker; newList->next = list; list = newList; break; } } } while (list != NULL) { newList = list; list = newList->next; setSingleNodeStatus(newList->node, false); if (newList->multiplicity >= MULTIPLICITY_CUTOFF) { if (destination == NULL) { destination = newList->node; path = newList->marker; } else if (destination != newList->node) multipleHits = true; } deallocateRBConnection(newList); } if (multipleHits) { multCounter++; setUniqueness(startingNode, false); return false; } if (destination == NULL || destination == startingNode || destination == getTwinNode(startingNode)) { nullCounter++; return false; } // Check for reciprocity for (startMarker = getMarker(getTwinNode(destination)); startMarker != NULL_IDX; startMarker = getNextInNode(startMarker)) { if (getFinishOffset(startMarker) > 2 * getWordLength(graph)) continue; for (currentMarker = getNextInSequence(startMarker); currentMarker != NULL_IDX; currentMarker = getNextInSequence(currentMarker)) { if (!getUniqueness(getNode(currentMarker))) { continue; } else if (getNodeStatus(getNode(currentMarker))) { if (getStartOffset(currentMarker) > 2 * getWordLength(graph)) break; for (newList = list; newList != NULL; newList = newList->next) { if (newList->node == getNode(currentMarker)) { newList->multiplicity++; break; } } if (newList == NULL) abort(); break; } else { if (getStartOffset(currentMarker) > 2 * getWordLength(graph)) break; setSingleNodeStatus(getNode(currentMarker), true); newList = allocateRBConnection(); newList->node = getNode(currentMarker); newList->multiplicity = 1; newList->next = list; list = newList; break; } } } while (list != NULL) { newList = list; list = newList->next; setSingleNodeStatus(newList->node, false); if (newList->multiplicity >= MULTIPLICITY_CUTOFF && newList->node != getTwinNode(startingNode)) multipleHits = true; deallocateRBConnection(newList); } if (multipleHits) { multCounter++; setUniqueness(destination, false); return false; } // Aligning long reads to each other: // TODO // Merge pairwise alignments and produce consensus // TODO return true; }