void removeLowCovEdges (int lenCutoff, unsigned short covCutoff)
{
    unsigned int bal_ed;
    unsigned int arcRight_n, arcLeft_n;
    ARC *arcLeft, *arcRight;
    unsigned int i;
    int counter = 0;

    for (i = 1; i <= num_ed; i++)
    {
        if (edge_array[i].deleted || edge_array[i].cvg == 0 || edge_array[i].cvg > covCutoff * 10 || edge_array[i].length >= lenCutoff || EdSameAsTwin (i) || edge_array[i].length == 0)
        {
            continue;
        }

        bal_ed = getTwinEdge (i);
        arcRight = arcCount (i, &arcRight_n);
        arcLeft = arcCount (bal_ed, &arcLeft_n);

        if (arcLeft_n < 1 || arcRight_n < 1)
        {
            continue;
        }

        destroyEdge (i);
        counter++;
    }

    printf ("Remove low coverage(%d): %d inner edges destroyed\n", covCutoff, counter);
    removeDeadArcs ();
    linearConcatenate ();
    compactEdgeArray ();
}
void removeWeakEdges (int lenCutoff, unsigned int multiCutoff)
{
    unsigned int bal_ed;
    unsigned int arcRight_n, arcLeft_n;
    ARC *arcLeft, *arcRight;
    unsigned int i;
    int counter = 0;

    for (i = 1; i <= num_ed; i++)
    {
        if (edge_array[i].deleted || edge_array[i].length == 0 || edge_array[i].length > lenCutoff || EdSameAsTwin (i))
        {
            continue;
        }

        bal_ed = getTwinEdge (i);
        arcRight = arcCount (i, &arcRight_n);

        if (arcRight_n > 1 || !arcRight || arcRight->multiplicity > multiCutoff)
        {
            continue;
        }

        arcLeft = arcCount (bal_ed, &arcLeft_n);

        if (arcLeft_n > 1 || !arcLeft || arcLeft->multiplicity > multiCutoff)
        {
            continue;
        }

        destroyEdge (i);
        counter++;
    }

    printf ("%d weak inner edges destroyed\n", counter);
    removeDeadArcs ();
    /*
       linearConcatenate();
       compactEdgeArray();
     */
}
boolean isUnreliableTip_strict (unsigned int edgeid, int cutLen)
{
    unsigned int arcRight_n, arcLeft_n;
    unsigned int bal_ed;
    unsigned int currentEd = edgeid;
    int length = 0;
    unsigned int mult = 0;
    ARC *arc, *activeArc = NULL, *tempArc;

    if (edgeid == 0)
    {
        return 0;
    }

    bal_ed = getTwinEdge (edgeid);

    if (bal_ed == edgeid)
    {
        return 0;
    }

    arcCount (bal_ed, &arcLeft_n);

    if (arcLeft_n > 0)
    {
        return 0;
    }

    while (currentEd)
    {
        arcCount (bal_ed, &arcLeft_n);
        tempArc = arcCount (currentEd, &arcRight_n);

        if (arcLeft_n > 1 || arcRight_n > 1)
        {
            if (arcLeft_n == 0 || length == 0)
            {
                return 0;
            }
            else
            {
                break;
            }
        }

        length += edge_array[currentEd].length;

        if (length >= cutLen)
        {
            return 0;
        }

        if (tempArc)
        {
            activeArc = tempArc;
            currentEd = activeArc->to_ed;
            bal_ed = getTwinEdge (currentEd);
        }
        else
        {
            currentEd = 0;
        }
    }

    if (currentEd == 0)
    {
        caseA++;
        return 1;
    }

    if (!activeArc)
    {
        printf ("no activeArc while checking edge %d\n", edgeid);
    }

    if (activeArc->multiplicity == 1)
    {
        caseB++;
        return 1;
    }

    for (arc = edge_array[bal_ed].arcs; arc != NULL; arc = arc->next)
        if (arc->multiplicity > mult)
        {
            mult = arc->multiplicity;
        }

    if (mult > activeArc->multiplicity)
    {
        caseC++;
    }

    return mult > activeArc->multiplicity;
}
Example #4
0
static boolean uniqueNodesConnect(Node * startingNode)
{
	Node *destination = NULL;
	PassageMarkerI startMarker, currentMarker;
	RBConnection *newList;
	RBConnection *list = NULL;
	boolean multipleHits = false;

	if (arcCount(startingNode) == 0)
		return false;

	if (getMarker(startingNode) == NULL_IDX)
		return false;

	dbgCounter++;

	// Checking for multiple destinations
	for (startMarker = getMarker(startingNode); startMarker != NULL_IDX;
	     startMarker = getNextInNode(startMarker)) {
		if (getFinishOffset(startMarker) >
		    2 * getWordLength(graph))
			continue;

		for (currentMarker = getNextInSequence(startMarker);
		     currentMarker != NULL_IDX;
		     currentMarker = getNextInSequence(currentMarker)) {
			if (!getUniqueness(getNode(currentMarker))) {
				continue;
			} else if (getNodeStatus(getNode(currentMarker))) {
				if (getStartOffset(currentMarker) >
				    2 * getWordLength(graph))
					break;
				for (newList = list; newList != NULL;
				     newList = newList->next) {
					if (newList->node ==
					    getNode(currentMarker)) {
						newList->multiplicity++;
						break;
					}
				}
				if (newList == NULL)
					abort();
				break;
			} else {
				if (getStartOffset(currentMarker) >
				    2 * getWordLength(graph))
					break;
				setSingleNodeStatus(getNode(currentMarker),
						    true);
				newList = allocateRBConnection();
				newList->node = getNode(currentMarker);
				newList->multiplicity = 1;
				newList->marker = startMarker;
				newList->next = list;
				list = newList;
				break;
			}
		}
	}

	while (list != NULL) {
		newList = list;
		list = newList->next;
		setSingleNodeStatus(newList->node, false);
		if (newList->multiplicity >= MULTIPLICITY_CUTOFF) {
			if (destination == NULL) {
				destination = newList->node;
				path = newList->marker;
			} else if (destination != newList->node)
				multipleHits = true;
		}
		deallocateRBConnection(newList);
	}

	if (multipleHits) {
		multCounter++;
		setUniqueness(startingNode, false);
		return false;
	}

	if (destination == NULL || destination == startingNode
	    || destination == getTwinNode(startingNode)) {
		nullCounter++;
		return false;
	}
	// Check for reciprocity
	for (startMarker = getMarker(getTwinNode(destination));
	     startMarker != NULL_IDX;
	     startMarker = getNextInNode(startMarker)) {
		if (getFinishOffset(startMarker) >
		    2 * getWordLength(graph))
			continue;

		for (currentMarker = getNextInSequence(startMarker);
		     currentMarker != NULL_IDX;
		     currentMarker = getNextInSequence(currentMarker)) {
			if (!getUniqueness(getNode(currentMarker))) {
				continue;
			} else if (getNodeStatus(getNode(currentMarker))) {
				if (getStartOffset(currentMarker) >
				    2 * getWordLength(graph))
					break;
				for (newList = list; newList != NULL;
				     newList = newList->next) {
					if (newList->node ==
					    getNode(currentMarker)) {
						newList->multiplicity++;
						break;
					}
				}
				if (newList == NULL)
					abort();
				break;
			} else {
				if (getStartOffset(currentMarker) >
				    2 * getWordLength(graph))
					break;
				setSingleNodeStatus(getNode(currentMarker),
						    true);
				newList = allocateRBConnection();
				newList->node = getNode(currentMarker);
				newList->multiplicity = 1;
				newList->next = list;
				list = newList;
				break;
			}
		}
	}

	while (list != NULL) {
		newList = list;
		list = newList->next;
		setSingleNodeStatus(newList->node, false);
		if (newList->multiplicity >= MULTIPLICITY_CUTOFF
		    && newList->node != getTwinNode(startingNode))
			multipleHits = true;
		deallocateRBConnection(newList);
	}

	if (multipleHits) {
		multCounter++;
		setUniqueness(destination, false);
		return false;
	}
	// Aligning long reads to each other:
	// TODO 

	// Merge pairwise alignments and produce consensus
	// TODO

	return true;
}