Beispiel #1
0
static void concatenateLongReads(Node * node, Node * candidate,
				 Graph * graph)
{
	PassageMarkerI marker, tmpMarker;

	// Passage marker management in node:
	for (marker = getMarker(node); marker != NULL_IDX;
	     marker = getNextInNode(marker)) {
		if (!goesToNode(marker, candidate))
			incrementFinishOffset(marker,
					      getNodeLength(candidate));
	}

	// Swapping new born passageMarkers from candidate to node
	for (marker = getMarker(candidate); marker != NULL_IDX;
	     marker = tmpMarker) {
		tmpMarker = getNextInNode(marker);

		if (!comesFromNode(marker, node)) {
			extractPassageMarker(marker);
			incrementStartOffset(marker,
					      getNodeLength(node));
			transposePassageMarker(marker, node);
			incrementFinishOffset(getTwinMarker(marker),
					      getNodeLength(node));
		} else {
			reconnectPassageMarker(marker, node, &tmpMarker);
		}
	}
}
Beispiel #2
0
static void computePartialReadToNodeMapping(IDnum nodeID,
					    ReadOccurence ** readNodes,
					    IDnum * readNodeCounts,
					    boolean * readMarker,
					    ReadSet * reads)
{
	ShortReadMarker *shortMarker;
	IDnum index, readIndex;
	ReadOccurence *readArray, *readOccurence;
	Node *node = getNodeInGraph(graph, nodeID);
	ShortReadMarker *nodeArray = getNodeReads(node, graph);
	IDnum nodeReadCount = getNodeReadCount(node, graph);
	PassageMarkerI marker;

	for (index = 0; index < nodeReadCount; index++) {
		shortMarker = getShortReadMarkerAtIndex(nodeArray, index);
		readIndex = getShortReadMarkerID(shortMarker);
		readArray = readNodes[readIndex];
		readOccurence = &readArray[readNodeCounts[readIndex]];
		readOccurence->nodeID = nodeID;
		readOccurence->position =
		    getShortReadMarkerPosition(shortMarker);
		readOccurence->offset =
		    getShortReadMarkerOffset(shortMarker);
		readNodeCounts[readIndex]++;
	}

	for (marker = getMarker(node); marker != NULL_IDX;
	     marker = getNextInNode(marker)) {
		readIndex = getPassageMarkerSequenceID(marker);
		if (readIndex <= 0 || reads->categories[readIndex - 1] == REFERENCE)
			continue;

		if (!readMarker[readIndex]) {
			readArray = readNodes[readIndex];
			readOccurence =
			    &readArray[readNodeCounts[readIndex]];
			readOccurence->nodeID = nodeID;
			readOccurence->position = getStartOffset(marker);
			readOccurence->offset =
			    getPassageMarkerStart(marker);
			readNodeCounts[readIndex]++;
			readMarker[readIndex] = true;
		} else {
			readArray = readNodes[readIndex];
			readOccurence =
			    &readArray[readNodeCounts[readIndex] - 1];
			readOccurence->position = -1;
			readOccurence->offset = -1;
		}
	}

	for (marker = getMarker(node); marker != NULL_IDX;
	     marker = getNextInNode(marker)) {
		readIndex = getPassageMarkerSequenceID(marker);
		if (readIndex > 0)
			readMarker[readIndex] = false;
	}
}
Beispiel #3
0
static IDnum *computeReadToNodeCounts()
{
	IDnum readIndex, nodeIndex;
	IDnum maxNodeIndex = 2 * nodeCount(graph) + 1;
	IDnum maxReadIndex = sequenceCount(graph) + 1;
	IDnum *readNodeCounts = callocOrExit(maxReadIndex, IDnum);
	boolean *readMarker = callocOrExit(maxReadIndex, boolean);
	ShortReadMarker *nodeArray, *shortMarker;
	PassageMarkerI marker;
	Node *node;
	IDnum nodeReadCount;

	//puts("Computing read to node mapping array sizes");

	for (nodeIndex = 0; nodeIndex < maxNodeIndex; nodeIndex++) {
		node = getNodeInGraph(graph, nodeIndex - nodeCount(graph));
		if (node == NULL)
			continue;

		// Short reads
		if (readStartsAreActivated(graph)) {
			nodeArray = getNodeReads(node, graph);
			nodeReadCount = getNodeReadCount(node, graph);
			for (readIndex = 0; readIndex < nodeReadCount; readIndex++) {
				shortMarker =
				    getShortReadMarkerAtIndex(nodeArray,
							      readIndex);
				readNodeCounts[getShortReadMarkerID
					       (shortMarker)]++;
			}
		}

		// Long reads
		for (marker = getMarker(node); marker != NULL_IDX;
		     marker = getNextInNode(marker)) {
			readIndex = getPassageMarkerSequenceID(marker);
			if (readIndex < 0)
				continue;

			if (readMarker[readIndex])
				continue;

			readNodeCounts[readIndex]++;
			readMarker[readIndex] = true;
		}

		// Clean up marker array
		for (marker = getMarker(node); marker != NULL_IDX;
		     marker = getNextInNode(marker)) {
			readIndex = getPassageMarkerSequenceID(marker);
			if (readIndex > 0)
				readMarker[readIndex] = false;
		}
	}

	free(readMarker);
	return readNodeCounts;
}
Beispiel #4
0
static void projectFromNode(IDnum nodeID,
			    ReadOccurence ** readNodes,
			    IDnum * readNodeCounts,
			    IDnum * readPairs, Category * cats,
			    boolean * dubious, Coordinate * lengths)
{
	IDnum index;
	ShortReadMarker *nodeArray, *shortMarker;
	PassageMarker *marker;
	Node *node;
	IDnum nodeReadCount;

	node = getNodeInGraph(graph, nodeID);

	if (node == NULL || !getUniqueness(node))
		return;

	nodeArray = getNodeReads(node, graph);
	nodeReadCount = getNodeReadCount(node, graph);
	for (index = 0; index < nodeReadCount; index++) {
		shortMarker = getShortReadMarkerAtIndex(nodeArray, index);
		if (dubious[getShortReadMarkerID(shortMarker) - 1])
			continue;
		projectFromShortRead(node, shortMarker, readPairs, cats,
				     readNodes, readNodeCounts, lengths);
	}

	for (marker = getMarker(node); marker != NULL;
	     marker = getNextInNode(marker)) {
		if (getPassageMarkerSequenceID(marker) > 0)
			projectFromLongRead(node, marker, readPairs, cats,
					    readNodes, readNodeCounts,
					    lengths);
	}
}
Beispiel #5
0
// DEBUG
void checkNode(Node* node) {
	PassageMarkerI marker1 = getMarker(node);

	if (marker1 == NULL_IDX)
		return;

	PassageMarkerI marker2 = getNextInNode(marker1);

	if (marker2 == NULL_IDX)
		return;

	if (getStartOffset(marker1) == getStartOffset(marker2))
		abort();
	if (getFinishOffset(marker1) == getFinishOffset(marker2))
		abort();
	printf(">>>> Node %li\n", (long) getNodeID(node));
	printf("Marker1: %li - %li > %li (%li) \n", (long) getStartOffset(marker1), (long) getPassageMarkerLength(marker1), (long) (getNodeLength(node) - getFinishOffset(marker1)), (long) getFinishOffset(marker1));
	printf("%s\n", readPassageMarker(marker1));
	printf("Marker2: %li - %li > %li (%li) \n", (long) getStartOffset(marker2), (long) getPassageMarkerLength(marker2), (long) (getNodeLength(node) - getFinishOffset(marker2)), (long) getFinishOffset(marker2));

	printf("%s\n", readPassageMarker(marker2));
	if (getStartOffset(marker1) < getNodeLength(node) - getFinishOffset(marker2) 
	    && getStartOffset(marker2) < getNodeLength(node) - getFinishOffset(marker1)) {
		//abort();
		;
	}
}
Beispiel #6
0
static void adjustLongReads(Node * target, Coordinate nodeLength)
{
	PassageMarkerI marker;

	for (marker = getMarker(target); marker != NULL_IDX;
	     marker = getNextInNode(marker))
		incrementFinishOffset(marker, nodeLength);
}
Beispiel #7
0
static void trimLongReadTips()
{
	IDnum index;
	Node *node;
	PassageMarkerI marker, next;

	velvetLog("Trimming read tips\n");

	for (index = 1; index <= nodeCount(graph); index++) {
		node = getNodeInGraph(graph, index);

		if (getUniqueness(node))
			continue;

		for (marker = getMarker(node); marker != NULL_IDX;
		     marker = next) {
			next = getNextInNode(marker);

			if (!isInitial(marker) && !isTerminal(marker))
				continue;

			if (isTerminal(marker))
				marker = getTwinMarker(marker);

			while (!getUniqueness(getNode(marker))) {
				if (next != NULL_IDX
				    && (marker == next
					|| marker == getTwinMarker(next)))
					next = getNextInNode(next);
				if (getNextInSequence(marker) != NULL_IDX) {
					marker = getNextInSequence(marker);
					destroyPassageMarker
					    (getPreviousInSequence
					     (marker));
				} else {
					destroyPassageMarker(marker);
					break;
				}
			}
		}
	}
}
Beispiel #8
0
static void admitGroupies(Node * source, Node * bypass)
{
	PassageMarkerI marker, tmpMarker;

	for (marker = getMarker(source); marker != NULL_IDX;
	     marker = tmpMarker) {
		tmpMarker = getNextInNode(marker);
		extractPassageMarker(marker);
		transposePassageMarker(marker, bypass);
		incrementFinishOffset(getTwinMarker(marker),
				      getNodeLength(bypass));
	}

}
Beispiel #9
0
static void updateMembers(Node * bypass, Node * nextNode)
{
	PassageMarkerI marker, next, tmp;
	Coordinate nextLength = getNodeLength(nextNode);

	// Update  marker + arc info
	for (marker = getMarker(bypass); marker != NULL_IDX; marker = tmp) {
		tmp = getNextInNode(marker);

		if (!isTerminal(marker)
		    && getNode(getNextInSequence(marker)) == nextNode) {
			// Marker steps right into target
			next = getNextInSequence(marker);
			disconnectNextPassageMarker(marker, graph);
			destroyPassageMarker(next);
		} else if (getUniqueness(nextNode)
			   && goesToNode(marker, nextNode)) {
			// Marker goes indirectly to target
			while (getNode(getNextInSequence(marker)) !=
			       nextNode) {
				next = getNextInSequence(marker);
				disconnectNextPassageMarker(marker, graph);
				destroyPassageMarker(next);
			}

			next = getNextInSequence(marker);
			disconnectNextPassageMarker(marker, graph);
			destroyPassageMarker(next);
		} else if (!isTerminal(marker)
			   && getFinishOffset(marker) == 0) {
			// Marker goes somewhere else than to target
			next = getNextInSequence(marker);
			incrementFinishOffset(marker, nextLength);
		} else {
			// Marker goes nowhere
			incrementFinishOffset(marker, nextLength);
		}
	}
}
Beispiel #10
0
static void reconnectPassageMarker(PassageMarkerI marker, Node * node,
				   PassageMarkerI * ptr)
{
	PassageMarkerI current;
	PassageMarkerI next = getNextInSequence(marker);
	PassageMarkerI tmpMarker;

	for (current = marker; getNode(current) != node;
	     current = getPreviousInSequence(current));

	setPreviousInSequence(current, next);
	concatenatePassageMarkers(current, marker);

	// Removing node and all intermediaries
	while (marker != current) {
		tmpMarker = getPreviousInSequence(marker);
		if (*ptr == marker || *ptr == getTwinMarker(marker))
			*ptr = getNextInNode(*ptr);
		setNextInSequence(marker, NULL_IDX);
		setPreviousInSequence(NULL_IDX, marker);
		destroyPassageMarker(marker);
		marker = tmpMarker;
	}
}
Beispiel #11
0
// Replaces two consecutive nodes into a single equivalent node
// The extra memory is freed
void concatenateNodes(Node * nodeA, Node * nodeB, Graph * graph)
{
	PassageMarkerI marker, tmpMarker;
	Node *twinA = getTwinNode(nodeA);
	Node *twinB = getTwinNode(nodeB);
	Arc *arc;
	Category cat;

	// Arc management:
	// Freeing useless arcs
	while (getArc(nodeA) != NULL)
		destroyArc(getArc(nodeA), graph);

	// Correct arcs
	for (arc = getArc(nodeB); arc != NULL; arc = getNextArc(arc)) {
		if (getDestination(arc) != twinB)
			createAnalogousArc(nodeA, getDestination(arc),
					   arc, graph);
		else
			createAnalogousArc(nodeA, twinA, arc, graph);
	}

	// Passage marker management in node A:
	for (marker = getMarker(nodeA); marker != NULL_IDX;
	     marker = getNextInNode(marker))
		if (isTerminal(marker))
			incrementFinishOffset(marker,
					      getNodeLength(nodeB));

	// Swapping new born passageMarkers from B to A
	for (marker = getMarker(nodeB); marker != NULL_IDX; marker = tmpMarker) {
		tmpMarker = getNextInNode(marker);

		if (isInitial(marker)
		    || getNode(getPreviousInSequence(marker)) != nodeA) {
			extractPassageMarker(marker);
			transposePassageMarker(marker, nodeA);
			incrementFinishOffset(getTwinMarker(marker),
					      getNodeLength(nodeA));
		} else
			disconnectNextPassageMarker(getPreviousInSequence
						    (marker), graph);
	}

	// Read starts
	concatenateReadStarts(nodeA, nodeB, graph);

	// Gaps
	appendNodeGaps(nodeA, nodeB, graph);

	// Descriptor management (node)
	appendDescriptors(nodeA, nodeB);

	// Update uniqueness:
	setUniqueness(nodeA, getUniqueness(nodeA) || getUniqueness(nodeB));

	// Update virtual coverage
	for (cat = 0; cat < CATEGORIES; cat++)
		incrementVirtualCoverage(nodeA, cat,
					 getVirtualCoverage(nodeB, cat));

	// Update original virtual coverage
	for (cat = 0; cat < CATEGORIES; cat++)
		incrementOriginalVirtualCoverage(nodeA, cat,
						 getOriginalVirtualCoverage
						 (nodeB, cat));

	// Freeing gobbled node
	destroyNode(nodeB, graph);
}
Beispiel #12
0
// Replaces two consecutive nodes into a single equivalent node
// The extra memory is freed
void concatenateStringOfNodes(Node * nodeA, Graph * graph)
{
	Node *twinA = getTwinNode(nodeA);
	Node * nodeB = nodeA;
	Node * twinB;
	Node *currentNode, *nextNode;
	Coordinate totalLength = 0;
	PassageMarkerI marker, tmpMarker;
	Arc *arc;
	Category cat;

	while (simpleArcCount(nodeB) == 1
	       &&
	       simpleArcCount(getTwinNode
			      (getDestination(getArc(nodeB)))) ==
	       1
	       && getDestination(getArc(nodeB)) != getTwinNode(nodeB)
	       && getDestination(getArc(nodeB)) != nodeA) {
		totalLength += getNodeLength(nodeB);
		nodeB = getDestination(getArc(nodeB));
	}
	twinB = getTwinNode(nodeB);
	totalLength += getNodeLength(nodeB);
	reallocateNodeDescriptor(nodeA, totalLength);

	currentNode = nodeA;
	while (currentNode != nodeB) {		
		currentNode = getDestination(getArc(currentNode));

		// Passage marker management in node A:
		for (marker = getMarker(nodeA); marker != NULL_IDX;
		     marker = getNextInNode(marker))
			if (getNode(getNextInSequence(marker)) != currentNode)
				incrementFinishOffset(marker,
						      getNodeLength(currentNode));

		// Swapping new born passageMarkers from B to A
		for (marker = getMarker(currentNode); marker != NULL_IDX; marker = tmpMarker) {
			tmpMarker = getNextInNode(marker);

			if (isInitial(marker)
			    || getNode(getPreviousInSequence(marker)) != nodeA) {
				extractPassageMarker(marker);
				transposePassageMarker(marker, nodeA);
				incrementFinishOffset(getTwinMarker(marker),
						      getNodeLength(nodeA));
			} else
				disconnectNextPassageMarker(getPreviousInSequence
							    (marker), graph);
		}

		// Read starts
		concatenateReadStarts(nodeA, currentNode, graph);

		// Gaps
		appendNodeGaps(nodeA, currentNode, graph);

		// Update uniqueness:
		setUniqueness(nodeA, getUniqueness(nodeA) || getUniqueness(currentNode));

		// Update virtual coverage
		for (cat = 0; cat < CATEGORIES; cat++)
			incrementVirtualCoverage(nodeA, cat,
						 getVirtualCoverage(currentNode, cat));

		// Update original virtual coverage
		for (cat = 0; cat < CATEGORIES; cat++)
			incrementOriginalVirtualCoverage(nodeA, cat,
							 getOriginalVirtualCoverage
							 (currentNode, cat));
		// Descriptor management (node)
		directlyAppendDescriptors(nodeA, currentNode, totalLength);
	}

	// Correct arcs
	for (arc = getArc(nodeB); arc != NULL; arc = getNextArc(arc)) {
		if (getDestination(arc) != twinB)
			createAnalogousArc(nodeA, getDestination(arc),
					   arc, graph);
		else
			createAnalogousArc(nodeA, twinA, arc, graph);
	}

	// Freeing gobbled nodes
	currentNode = getTwinNode(nodeB);
	while (currentNode != getTwinNode(nodeA)) {
		arc = getArc(currentNode);
		nextNode = getDestination(arc);
		destroyNode(currentNode, graph);
		currentNode = nextNode;
	}
}
Beispiel #13
0
static boolean uniqueNodesConnect(Node * startingNode)
{
	Node *destination = NULL;
	PassageMarkerI startMarker, currentMarker;
	RBConnection *newList;
	RBConnection *list = NULL;
	boolean multipleHits = false;

	if (arcCount(startingNode) == 0)
		return false;

	if (getMarker(startingNode) == NULL_IDX)
		return false;

	dbgCounter++;

	// Checking for multiple destinations
	for (startMarker = getMarker(startingNode); startMarker != NULL_IDX;
	     startMarker = getNextInNode(startMarker)) {
		if (getFinishOffset(startMarker) >
		    2 * getWordLength(graph))
			continue;

		for (currentMarker = getNextInSequence(startMarker);
		     currentMarker != NULL_IDX;
		     currentMarker = getNextInSequence(currentMarker)) {
			if (!getUniqueness(getNode(currentMarker))) {
				continue;
			} else if (getNodeStatus(getNode(currentMarker))) {
				if (getStartOffset(currentMarker) >
				    2 * getWordLength(graph))
					break;
				for (newList = list; newList != NULL;
				     newList = newList->next) {
					if (newList->node ==
					    getNode(currentMarker)) {
						newList->multiplicity++;
						break;
					}
				}
				if (newList == NULL)
					abort();
				break;
			} else {
				if (getStartOffset(currentMarker) >
				    2 * getWordLength(graph))
					break;
				setSingleNodeStatus(getNode(currentMarker),
						    true);
				newList = allocateRBConnection();
				newList->node = getNode(currentMarker);
				newList->multiplicity = 1;
				newList->marker = startMarker;
				newList->next = list;
				list = newList;
				break;
			}
		}
	}

	while (list != NULL) {
		newList = list;
		list = newList->next;
		setSingleNodeStatus(newList->node, false);
		if (newList->multiplicity >= MULTIPLICITY_CUTOFF) {
			if (destination == NULL) {
				destination = newList->node;
				path = newList->marker;
			} else if (destination != newList->node)
				multipleHits = true;
		}
		deallocateRBConnection(newList);
	}

	if (multipleHits) {
		multCounter++;
		setUniqueness(startingNode, false);
		return false;
	}

	if (destination == NULL || destination == startingNode
	    || destination == getTwinNode(startingNode)) {
		nullCounter++;
		return false;
	}
	// Check for reciprocity
	for (startMarker = getMarker(getTwinNode(destination));
	     startMarker != NULL_IDX;
	     startMarker = getNextInNode(startMarker)) {
		if (getFinishOffset(startMarker) >
		    2 * getWordLength(graph))
			continue;

		for (currentMarker = getNextInSequence(startMarker);
		     currentMarker != NULL_IDX;
		     currentMarker = getNextInSequence(currentMarker)) {
			if (!getUniqueness(getNode(currentMarker))) {
				continue;
			} else if (getNodeStatus(getNode(currentMarker))) {
				if (getStartOffset(currentMarker) >
				    2 * getWordLength(graph))
					break;
				for (newList = list; newList != NULL;
				     newList = newList->next) {
					if (newList->node ==
					    getNode(currentMarker)) {
						newList->multiplicity++;
						break;
					}
				}
				if (newList == NULL)
					abort();
				break;
			} else {
				if (getStartOffset(currentMarker) >
				    2 * getWordLength(graph))
					break;
				setSingleNodeStatus(getNode(currentMarker),
						    true);
				newList = allocateRBConnection();
				newList->node = getNode(currentMarker);
				newList->multiplicity = 1;
				newList->next = list;
				list = newList;
				break;
			}
		}
	}

	while (list != NULL) {
		newList = list;
		list = newList->next;
		setSingleNodeStatus(newList->node, false);
		if (newList->multiplicity >= MULTIPLICITY_CUTOFF
		    && newList->node != getTwinNode(startingNode))
			multipleHits = true;
		deallocateRBConnection(newList);
	}

	if (multipleHits) {
		multCounter++;
		setUniqueness(destination, false);
		return false;
	}
	// Aligning long reads to each other:
	// TODO 

	// Merge pairwise alignments and produce consensus
	// TODO

	return true;
}