Exemplo n.º 1
0
// DEBUG
void checkNode(Node* node) {
	PassageMarkerI marker1 = getMarker(node);

	if (marker1 == NULL_IDX)
		return;

	PassageMarkerI marker2 = getNextInNode(marker1);

	if (marker2 == NULL_IDX)
		return;

	if (getStartOffset(marker1) == getStartOffset(marker2))
		abort();
	if (getFinishOffset(marker1) == getFinishOffset(marker2))
		abort();
	printf(">>>> Node %li\n", (long) getNodeID(node));
	printf("Marker1: %li - %li > %li (%li) \n", (long) getStartOffset(marker1), (long) getPassageMarkerLength(marker1), (long) (getNodeLength(node) - getFinishOffset(marker1)), (long) getFinishOffset(marker1));
	printf("%s\n", readPassageMarker(marker1));
	printf("Marker2: %li - %li > %li (%li) \n", (long) getStartOffset(marker2), (long) getPassageMarkerLength(marker2), (long) (getNodeLength(node) - getFinishOffset(marker2)), (long) getFinishOffset(marker2));

	printf("%s\n", readPassageMarker(marker2));
	if (getStartOffset(marker1) < getNodeLength(node) - getFinishOffset(marker2) 
	    && getStartOffset(marker2) < getNodeLength(node) - getFinishOffset(marker1)) {
		//abort();
		;
	}
}
Exemplo n.º 2
0
static void concatenateLongReads(Node * node, Node * candidate,
				 Graph * graph)
{
	PassageMarkerI marker, tmpMarker;

	// Passage marker management in node:
	for (marker = getMarker(node); marker != NULL_IDX;
	     marker = getNextInNode(marker)) {
		if (!goesToNode(marker, candidate))
			incrementFinishOffset(marker,
					      getNodeLength(candidate));
	}

	// Swapping new born passageMarkers from candidate to node
	for (marker = getMarker(candidate); marker != NULL_IDX;
	     marker = tmpMarker) {
		tmpMarker = getNextInNode(marker);

		if (!comesFromNode(marker, node)) {
			extractPassageMarker(marker);
			incrementStartOffset(marker,
					      getNodeLength(node));
			transposePassageMarker(marker, node);
			incrementFinishOffset(getTwinMarker(marker),
					      getNodeLength(node));
		} else {
			reconnectPassageMarker(marker, node, &tmpMarker);
		}
	}
}
Exemplo n.º 3
0
void produceTranscript(Locus * locus, IDnum nodesInList)
{
	IDnum index = 0;
	Node *node;

	Transcript *transcript = newTranscript(nodesInList, ((double) nodesInList) / getContigCount(locus));

	while ((node = popNodeRecord())) {
		transcript->contigs[index] = node;
		if (index > 0) {
			transcript->distances[index - 1] =
			    getConnectionDistance((getConnectionBetweenNodes(transcript->contigs[index - 1], getTwinNode(node))));
			transcript->distances[index - 1] -=
			    getNodeLength(node)/2;
			transcript->distances[index - 1] -=
			    getNodeLength(transcript->contigs[index - 1])/2;
			if (getNodeLength(node) % 2 > 0 || getNodeLength(transcript->contigs[index - 1]) % 2 > 0)
			    transcript->distances[index - 1] -= 1;
			if (transcript->distances[index - 1] < 0)
				transcript->distances[index - 1] = 0;
		}
		index++;
	}
	transcript->contigCount = index;

	addTranscript(locus, transcript);
}
Exemplo n.º 4
0
static boolean
extractSequence(PassageMarkerI path, TightString * sequence)
{
	PassageMarkerI marker;
	Coordinate seqLength = 0;
	Coordinate writeIndex = 0;

	//velvetLog("Extracting sequence %li ... ", pathLength);

	//Measure length
	for (marker = getNextInSequence(path); !isTerminal(marker);
	     marker = getNextInSequence(marker))
		seqLength += getNodeLength(getNode(marker));

	if (seqLength > MAXREADLENGTH)
		return false;
	else
		setTightStringLength(sequence, seqLength);

	//Copy sequences
	for (marker = getNextInSequence(path); !isTerminal(marker);
	     marker = getNextInSequence(marker)) {
		appendNodeSequence(getNode(marker), sequence, writeIndex);
		writeIndex += getNodeLength(getNode(marker));
	}

	return true;
}
Exemplo n.º 5
0
static void recenterLocalScaffold(Node * node, Coordinate oldLength)
{
	MiniConnection *localConnect;
	Coordinate distance_shift = (getNodeLength(node) - oldLength) / 2;
	Coordinate min_distance =
	    getNodeLength(node) / 2 - BACKTRACK_CUTOFF;
	NodeList *nodeList, *next;
	IDnum node2ID;
	Node *node2;

	for (nodeList = markedNodes; nodeList != NULL; nodeList = next) {
		next = nodeList->next;

		node2 = nodeList->node;

		if (node2 == node) {
			setSingleNodeStatus(node2, 1);
			continue;
		}

		node2ID = getNodeID(node2);
		localConnect = &localScaffold[node2ID + nodeCount(graph)];
		localConnect->distance -= distance_shift;

		if (localConnect->distance < min_distance
		    && localConnect->backReference == NULL
		    && localConnect->frontReference == NULL)
			unmarkNode(node2, localConnect);
		else if (getNodeStatus(node2) > 0)
			setSingleNodeStatus(node2, 1);
		else if (getNodeStatus(node2) < 0)
			setSingleNodeStatus(node2, -1);
	}
}
Exemplo n.º 6
0
static boolean acceptorSiteAtJunction(Node * nodeA, Node * nodeB)
{
	Node *twinNodeA = getTwinNode(nodeA);
	Node *twinNodeB = getTwinNode(nodeB);
	Nucleotide n1, n2;
	int i;

	n2 = getNucleotideInNode(twinNodeB,
				 getNodeLength(twinNodeB) -
				 SPLICE_FUZZINESS);

	for (i = SPLICE_FUZZINESS - 1; i > 0; i--) {
		n1 = n2;
		n2 = getNucleotideInNode(twinNodeB,
					 getNodeLength(twinNodeB) - i);
		if (n1 == CYTOSINE && n2 == ADENINE)
			return true;
	}

	for (i = 0; i < SPLICE_FUZZINESS + 2; i++) {
		n1 = n2;
		n2 = getNucleotideInNode(twinNodeA, i);
		if (n1 == CYTOSINE && n2 == ADENINE)
			return true;
	}

	return false;
}
Exemplo n.º 7
0
boolean isUniqueBasic(Node * node)
{
	if (getNodeLength(node) < LONG_NODE_CUTOFF) {
		return false;
	}
	if (readCoverage(node) / (double) getNodeLength(node) >
	    1.5 * expected_coverage) {
		return false;
	}

	return true;
}
Exemplo n.º 8
0
static IDnum expectedNumberOfConnections(IDnum IDA, Connection * connect,
					 IDnum ** counts, Category cat)
{
	Node *A = getNodeInGraph(graph, IDA);
	Node *B = connect->destination;
	IDnum IDB = getNodeID(B);
	double left, middle, right;
	Coordinate longLength, shortLength, D;
	double M, N, O, P;
	Coordinate mu = getInsertLength(graph, cat);
	double sigma = sqrt(getInsertLength_var(graph, cat));
	double result;
	double densityA, densityB, minDensity;

	if (mu <= 0)
		return 0;

	if (getNodeLength(A) == 0 || getNodeLength(B) == 0)
		return 0;

	if (getNodeLength(A) < getNodeLength(B)) {
		longLength = getNodeLength(B);
		shortLength = getNodeLength(A);
	} else {
		longLength = getNodeLength(A);
		shortLength = getNodeLength(B);
	}

	densityA = counts[cat][IDA + nodeCount(graph)] / (double) getNodeLength(A);
	densityB = counts[cat][IDB + nodeCount(graph)] / (double) getNodeLength(B);
	minDensity = densityA > densityB ? densityB : densityA;

	D = getConnectionDistance(connect) - (longLength +
					      shortLength) / 2;

	M = (D - mu) / sigma;
	N = (D + shortLength - mu) / sigma;
	O = (D + longLength - mu) / sigma;
	P = (D + shortLength + longLength - mu) / sigma;

	left = ((norm(M) - norm(N)) - M * normInt(M, N)) * sigma;
	middle = shortLength * normInt(N, O);
	right = ((norm(O) - norm(P)) - P * normInt(O, P)) * (-sigma);

	result = (minDensity * (left + middle + right));

	if (result > 0)
		return (IDnum) result;
	else
		return 0;
}
Exemplo n.º 9
0
static void projectFromSingleRead(Node * node,
				  ReadOccurence * readOccurence,
				  Coordinate position,
				  Coordinate offset, Coordinate length)
{
	Coordinate distance = 0;
	Node *target = getNodeInGraph(graph, -readOccurence->nodeID);
	double variance = 1;

	if (target == getTwinNode(node) || target == node)
		return;

	if (position < 0) {
		variance += getNodeLength(node) * getNodeLength(node) / 16;
		// distance += 0;
	} else {
		// variance += 0;
		distance += position - offset - getNodeLength(node) / 2;
	}

	if (readOccurence->position < 0) {
		variance +=
		    getNodeLength(target) * getNodeLength(target) / 16;
		//distance += 0;
	} else {
		// variance += 0;
		distance +=
		    -readOccurence->position + readOccurence->offset +
		    getNodeLength(target) / 2;
	}

	if (position < 0 || readOccurence->position < 0) {
		if (offset < readOccurence->offset && distance - getNodeLength(node)/2 - getNodeLength(target)/2 < -10)
			return;
		if (offset > readOccurence->offset && distance - getNodeLength(node)/2 - getNodeLength(target)/2 > 10)
			return;

		variance += length * length / 16;
		createConnection(getNodeID(node), getNodeID(target), 1, 0,
				 distance, variance);
		createConnection(-getNodeID(node), -getNodeID(target), 1,
				 0, -distance, variance);
	} else if (distance > 0) {
		createConnection(getNodeID(node), getNodeID(target), 1, 0,
				 distance, variance);
	} else {
		createConnection(-getNodeID(node), -getNodeID(target), 1,
				 0, -distance, variance);
	}
}
Exemplo n.º 10
0
static boolean testConnection(IDnum IDA, Connection * connect,
			      IDnum ** counts)
{
	IDnum total = 0;
	Category cat;

	// Destroy tenuous connections
	if (connect->weight < 0.1)
		return false;

	if (connect->paired_count + connect->direct_count <
	    UNRELIABLE_CONNECTION_CUTOFF)
		return false;

	if (getNodeLength(connect->destination) <= LENGTHCUTOFF)
		return connect->direct_count > 0;

	for (cat = 0; cat <= CATEGORIES; cat++)
		total +=
		    expectedNumberOfConnections(IDA, connect, counts, cat);

	if (total == 0)
		return connect->direct_count > 0;
	else
		return connect->paired_count >= total * pairedThreshold;
}
Exemplo n.º 11
0
boolean isUniqueSolexa(Node * node)
{

	Coordinate nodeLength = getNodeLength(node);
	Coordinate nodeCoverage =
	    (getVirtualCoverage(node, 0) + getVirtualCoverage(node, 1));
	double nodeDensity, probability;

	if (nodeLength == 0) {
		return false;
	}
	if (nodeLength > LONG_NODE_CUTOFF) {
		nodeDensity = nodeCoverage / (double) nodeLength;

		probability =
		    LN2 / 2 +
		    nodeLength / (2 * expected_coverage) *
		    (expected_coverage * expected_coverage -
		     nodeDensity * nodeDensity / 2);
		return probability > PROBABILITY_CUTOFF;
	} else {
		return false;
		probability =
		    expected_coverage * nodeLength - nodeCoverage / LN2;
		return probability > 0;
	}
}
Exemplo n.º 12
0
static void markInterestingNodes(Node * node)
{
	Connection *connect;
	Node *destination;
	MiniConnection *localConnect;
	Coordinate min_distance =
	    getNodeLength(node) / 2 - BACKTRACK_CUTOFF;

	// Mark own node
	setEmptyMiniConnection(node);

	// Loop thru primary scaffold
	for (connect = getConnection(node); connect != NULL;
	     connect = getNextConnection(connect)) {
		destination = getTwinNode(getConnectionDestination(connect));

		localConnect =
		    &localScaffold[getNodeID(destination) +
				   nodeCount(graph)];

		if (getNodeStatus(destination)) {
			readjustMiniConnection(destination, localConnect,
					       getConnectionDistance(connect),
					       min_distance,
					       getConnectionVariance(connect), connect,
					       NULL);
			localConnect->backReference = NULL;
		} else {
			resetMiniConnection(destination, localConnect,
					    getConnectionDistance(connect),
					    getConnectionVariance(connect), connect,
					    NULL, true);
		}

		integrateDerivativeDistances(connect, min_distance, true);
	}

	// Loop thru twin's primary scaffold
	for (connect = getConnection(getTwinNode(node)); connect != NULL;
	     connect = getNextConnection(connect)) {
		destination = getConnectionDestination(connect);
		localConnect =
		    &localScaffold[getNodeID(destination) +
				   nodeCount(graph)];

		if (getNodeStatus(destination))
			readjustMiniConnection(destination, localConnect,
					       -getConnectionDistance(connect),
					       min_distance,
					       getConnectionVariance(connect), NULL,
					       connect);
		else
			resetMiniConnection(destination, localConnect,
					    -getConnectionDistance(connect),
					    getConnectionVariance(connect), NULL,
					    connect, -1);

		integrateDerivativeDistances(connect, min_distance, false);
	}
}
Exemplo n.º 13
0
void concatenateReadStarts(Node * target, Node * source, Graph * graph)
{
	IDnum sourceLength, targetLength;
	ShortReadMarker *sourceArray, *targetArray, *marker;
	IDnum index;
	Coordinate position, nodeLength;

	if (!readStartsAreActivated(graph))
		return;

	if (target == NULL || source == NULL)
		return;

	// Update Coordinates
	sourceArray = getNodeReads(source, graph);
	sourceLength = getNodeReadCount(source, graph);

	nodeLength = getNodeLength(target);
	for (index = 0; index < sourceLength; index++) {
		marker = getShortReadMarkerAtIndex(sourceArray, index);
		position = getShortReadMarkerPosition(marker);
		if (position != -1) {
			position += nodeLength;
			setShortReadMarkerPosition(marker, position);
		}
	}

	// Same but for symmetrical reads
	targetArray = getNodeReads(getTwinNode(target), graph);
	targetLength = getNodeReadCount(getTwinNode(target), graph);

	nodeLength = getNodeLength(source);
	for (index = 0; index < targetLength; index++) {
		marker = getShortReadMarkerAtIndex(targetArray, index);
		position = getShortReadMarkerPosition(marker);
		if (position != -1) {
			position += nodeLength;
			setShortReadMarkerPosition(marker, position);
		}
	}

	// Merging lists
	mergeNodeReads(target, source, graph);
	mergeNodeReads(getTwinNode(target), getTwinNode(source), graph);
}
Exemplo n.º 14
0
static void measureCoOccurences(Coordinate ** coOccurences, boolean * interestingReads, ReadOccurence ** readNodes, IDnum * readNodeCounts, IDnum * readPairs, Category * cats) {
	IDnum coOccurencesIndex[CATEGORIES + 1];
	IDnum observationIndex;
	IDnum readIndex, readPairIndex;
	IDnum readNodeCount;
	IDnum readOccurenceIndex, readPairOccurenceIndex;
	ReadOccurence * readOccurence, *readPairOccurence;
	Category libID;

	for (libID = 0; libID < CATEGORIES + 1; libID++)
		coOccurencesIndex[libID] = 0;

	for (readIndex = 0; readIndex < sequenceCount(graph); readIndex++) {
		// Eliminating dodgy, unpaired, already counted or user-specified reads
		if (!interestingReads[readIndex])
			continue;
		
		// Find co-occurence
		// We know that for each read the read occurences are ordered by increasing node ID
		libID = cats[readIndex]/2;
		readPairIndex = readPairs[readIndex];	
		observationIndex = coOccurencesIndex[libID];
		
		readOccurence = readNodes[readIndex + 1];
		readOccurenceIndex = 0;
		readNodeCount = readNodeCounts[readIndex + 1];

		readPairOccurenceIndex = readNodeCounts[readPairIndex + 1] - 1;
		readPairOccurence = &(readNodes[readPairIndex + 1][readPairOccurenceIndex]);

		while (readOccurenceIndex < readNodeCount && readPairOccurenceIndex >= 0) {
			if (readOccurence->nodeID == -readPairOccurence->nodeID) {
				if (readOccurence->position > 0 && readPairOccurence->position > 0) {
					coOccurences[libID][observationIndex] = 
					      getNodeLength(getNodeInGraph(graph, readOccurence->nodeID))
					      + getWordLength(graph) - 1
					      - (readOccurence->position - readOccurence->offset)	
					      - (readPairOccurence->position - readPairOccurence->offset);
					coOccurencesIndex[libID]++;
					break;
				} else {
					readOccurence++;
					readOccurenceIndex++;	
					readPairOccurence--;
					readPairOccurenceIndex--;	
				}
			} else if (readOccurence->nodeID < -readPairOccurence->nodeID) {
				readOccurence++;
				readOccurenceIndex++;	
			} else {
				readPairOccurence--;
				readPairOccurenceIndex--;	
			}
		}
	}
}
Exemplo n.º 15
0
static void recenterNode(Node * node, Coordinate oldLength)
{
	IDnum nodeID = getNodeID(node);
	Connection *connect, *next;
	Coordinate distance_shift = (getNodeLength(node) - oldLength) / 2;
	Coordinate min_distance =
	    getNodeLength(node) / 2 - BACKTRACK_CUTOFF;
	MiniConnection *localConnect;

	//velvetLog("Recentering node\n");

	for (connect = getConnection(node); connect != NULL;
	     connect = next) {
		next = getNextConnection(connect);
		incrementConnectionDistance(connect, -distance_shift);

		if (getConnectionDistance(connect) < min_distance) {
			//velvetLog("Unrecording %li\n",
			//       -getNodeID(getConnectionDestination(connect)));
			localConnect =
			    &localScaffold[-getNodeID(getConnectionDestination(connect))
					   + nodeCount(graph)];
			localConnect->frontReference = NULL;
			unmarkNode(getTwinNode(getConnectionDestination(connect)),
				   localConnect);
			destroyConnection(connect, nodeID);
		} else if (getTwinConnection(connect) != NULL)
			incrementConnectionDistance(getTwinConnection(connect), -distance_shift);
	}

	for (connect = getConnection(getTwinNode(node)); connect != NULL;
	     connect = next) {
		next = getNextConnection(connect);
		incrementConnectionDistance(connect, distance_shift);

		if (getTwinConnection(connect) != NULL)
			incrementConnectionDistance(getTwinConnection(connect), distance_shift);
	}
}
Exemplo n.º 16
0
static void computeLocalNodeToNodeMappingsFromConnections(Connection *
							  connect,
							  Connection *
							  connect2)
{
	Node *node1 = getTwinNode(getConnectionDestination(connect));
	Node *node2 = getTwinNode(getConnectionDestination(connect2));
	IDnum nodeID1 = getNodeID(node1);
	IDnum nodeID2 = getNodeID(node2);
	Coordinate distance =
	    getNodeLength(node1)/2 + getNodeLength(node2)/2;
	Arc *arc;

	if (getUniqueness(node1) || getUniqueness(node2))
		return;


	if ((arc = getArcBetweenNodes(node1, node2, graph))
	    && !getConnectionBetweenNodes(node1, getTwinNode(node2))) {
		createConnection(nodeID1, -nodeID2, getMultiplicity(arc),
				 0, distance,
				 1 / (double) getMultiplicity(arc));
		incrementConnectionWeight(getConnectionBetweenNodes
					  (node1, getTwinNode(node2)),
					  getMultiplicity(arc));
	}

	if ((arc = getArcBetweenNodes(node2, node1, graph))
	    && !getConnectionBetweenNodes(node2, getTwinNode(node1))) {
		createConnection(nodeID2, -nodeID1, getMultiplicity(arc),
				 0, distance,
				 1 / (double) getMultiplicity(arc));
		incrementConnectionWeight(getConnectionBetweenNodes
					  (node2, getTwinNode(node1)),
					  getMultiplicity(arc));
	}

}
Exemplo n.º 17
0
static void admitGroupies(Node * source, Node * bypass)
{
	PassageMarkerI marker, tmpMarker;

	for (marker = getMarker(source); marker != NULL_IDX;
	     marker = tmpMarker) {
		tmpMarker = getNextInNode(marker);
		extractPassageMarker(marker);
		transposePassageMarker(marker, bypass);
		incrementFinishOffset(getTwinMarker(marker),
				      getNodeLength(bypass));
	}

}
Exemplo n.º 18
0
static boolean donorSiteAtJunction(Node * nodeA, Node * nodeB)
{
	Nucleotide n1, n2;
	int i;

	n2 = getNucleotideInNode(nodeA,
				 getNodeLength(nodeA) - SPLICE_FUZZINESS);

	for (i = SPLICE_FUZZINESS - 1; i > 0; i--) {
		n1 = n2;
		n2 = getNucleotideInNode(nodeA, getNodeLength(nodeA) - i);
		if (n1 == GUANINE && n2 == THYMINE)
			return true;
	}

	for (i = 0; i < SPLICE_FUZZINESS + 2; i++) {
		n1 = n2;
		n2 = getNucleotideInNode(nodeB, i);
		if (n1 == GUANINE && n2 == THYMINE)
			return true;
	}

	return false;
}
Exemplo n.º 19
0
static boolean finishesWithPAS(Node * node)
{
	char *nodeSeq = expandNodeFragment(node, 0, getNodeLength(node),
					   getWordLength(graph));
	boolean res = false;

	char *ptr = strstr(nodeSeq, "AATAAA");
	if (ptr)
		res = true;
	ptr = strstr(nodeSeq, "ATTAAA");
	if (ptr)
		res = true;

	free(nodeSeq);
	return res;
}
Exemplo n.º 20
0
static void projectFromReadPair(Node * node, ReadOccurence * readOccurence,
				Coordinate position, Coordinate offset,
				Coordinate insertLength,
				double insertVariance)
{
	Coordinate distance = insertLength;
	Coordinate variance = insertVariance;
	Node *target = getNodeInGraph(graph, readOccurence->nodeID);

	if (target == getTwinNode(node) || target == node)
		return;

	if (getUniqueness(target) && getNodeID(target) < getNodeID(node))
		return;

	if (position < 0) {
		variance += getNodeLength(node) * getNodeLength(node) / 16;
		// distance += 0;
	} else {
		// variance += 0;
		distance += position - offset - getNodeLength(node) / 2;
	}

	if (readOccurence->position < 0) {
		variance +=
		    getNodeLength(target) * getNodeLength(target) / 16;
		//distance += 0;
	} else {
		// variance += 0;
		distance +=
		    readOccurence->position - readOccurence->offset -
		    getNodeLength(target) / 2;
	}

	if (distance - getNodeLength(node)/2 - getNodeLength(target)/2 < -6 * sqrt(insertVariance))
		return;

	createConnection(getNodeID(node), getNodeID(target), 0, 1,
			 distance, variance);
}
Exemplo n.º 21
0
static void tourBusArc_local(Node * origin, Arc * arc, Time originTime)
{
	Node *destination = getDestination(arc);
	Time arcTime, totalTime, destinationTime;
	IDnum nodeIndex = getNodeID(destination) + nodeCount(graph);
	Node *oldPrevious = previous[nodeIndex];

	//velvetLog("Trying arc from %li -> %li\n", getNodeID(origin), getNodeID(destination)); 

	if (oldPrevious == origin)
		return;

	arcTime =
	    ((Time) getNodeLength(origin)) / ((Time) getMultiplicity(arc));
	totalTime = originTime + arcTime;

	destinationTime = times[nodeIndex];

	if (destinationTime == -1) {
		//velvetLog("New destination\n");
		setNodeTime(destination, totalTime);
		dheapNodes[nodeIndex] =
		    insertNodeIntoDHeap(dheap, totalTime, destination);
		previous[nodeIndex] = origin;
		return;
	} else if (destinationTime > totalTime) {
		//velvetLog("Previously visited from slower node %li\n", getNodeID(getNodePrevious(destination))); 
		if (dheapNodes[nodeIndex] == NULL) {
			return;
		}

		setNodeTime(destination, totalTime);
		replaceKeyInDHeap(dheap, dheapNodes[nodeIndex], totalTime);
		previous[nodeIndex] = origin;

		comparePaths_local(destination, oldPrevious);
		return;
	} else {
		//velvetLog("Previously visited by faster node %li\n", getNodeID(getNodePrevious(destination))); 
		comparePaths_local(destination, origin);
	}
}
Exemplo n.º 22
0
boolean isUniqueSolexa(Node * node)
{

	Coordinate nodeLength = getNodeLength(node);
	Coordinate nodeCoverage;
	double nodeDensity, probability;

	nodeCoverage = getTotalCoverage(node);

	if (nodeLength > LONG_NODE_CUTOFF) {
		nodeDensity = nodeCoverage / (double) nodeLength;

		probability =
		    LN2 / 2 +
		    nodeLength / (2 * expected_coverage) *
		    (expected_coverage * expected_coverage -
		     nodeDensity * nodeDensity / 2);
		return probability > PROBABILITY_CUTOFF;
	}
	return false;
}
Exemplo n.º 23
0
static void updateMembers(Node * bypass, Node * nextNode)
{
	PassageMarkerI marker, next, tmp;
	Coordinate nextLength = getNodeLength(nextNode);

	// Update  marker + arc info
	for (marker = getMarker(bypass); marker != NULL_IDX; marker = tmp) {
		tmp = getNextInNode(marker);

		if (!isTerminal(marker)
		    && getNode(getNextInSequence(marker)) == nextNode) {
			// Marker steps right into target
			next = getNextInSequence(marker);
			disconnectNextPassageMarker(marker, graph);
			destroyPassageMarker(next);
		} else if (getUniqueness(nextNode)
			   && goesToNode(marker, nextNode)) {
			// Marker goes indirectly to target
			while (getNode(getNextInSequence(marker)) !=
			       nextNode) {
				next = getNextInSequence(marker);
				disconnectNextPassageMarker(marker, graph);
				destroyPassageMarker(next);
			}

			next = getNextInSequence(marker);
			disconnectNextPassageMarker(marker, graph);
			destroyPassageMarker(next);
		} else if (!isTerminal(marker)
			   && getFinishOffset(marker) == 0) {
			// Marker goes somewhere else than to target
			next = getNextInSequence(marker);
			incrementFinishOffset(marker, nextLength);
		} else {
			// Marker goes nowhere
			incrementFinishOffset(marker, nextLength);
		}
	}
}
Exemplo n.º 24
0
static void adjustShortReads(Node * target, Node * source)
{
	ShortReadMarker *targetArray, *marker;
	IDnum targetLength, index;
	Coordinate position, nodeLength;

	if (!readStartsAreActivated(graph))
		return;

	targetArray = getNodeReads(getTwinNode(target), graph);
	targetLength = getNodeReadCount(getTwinNode(target), graph);

	nodeLength = getNodeLength(source);

	for (index = 0; index < targetLength; index++) {
		marker = getShortReadMarkerAtIndex(targetArray, index);
		position = getShortReadMarkerPosition(marker);
		if (position != -1) {
			position += nodeLength;
			setShortReadMarkerPosition(marker, position);
		}
	}
}
Exemplo n.º 25
0
static IDnum expectedNumberOfConnections(IDnum IDA, Connection * connect,
					 IDnum ** counts, Category cat)
{
	Node *A = getNodeInGraph(graph, IDA);
	Node *B = connect->destination;
	double left, middle, right;
	Coordinate longLength, shortLength, D;
	IDnum longCount;
	double M, N, O, P;
	Coordinate mu = getInsertLength(graph, cat);
	double sigma = sqrt(getInsertLength_var(graph, cat));
	double result;

	if (mu <= 0)
		return 0;

	if (getNodeLength(A) < getNodeLength(B)) {
		longLength = getNodeLength(B);
		shortLength = getNodeLength(A);
		longCount = counts[cat][getNodeID(B) + nodeCount(graph)];
	} else {
		longLength = getNodeLength(A);
		shortLength = getNodeLength(B);
		longCount = counts[cat][IDA + nodeCount(graph)];
	}

	D = connect->distance - (longLength + shortLength) / 2;

	M = (D - mu) / sigma;
	N = (D + shortLength - mu) / sigma;
	O = (D + longLength - mu) / sigma;
	P = (D + shortLength + longLength - mu) / sigma;

	left = ((norm(M) - norm(N)) - M * normInt(M, N)) * sigma;
	middle = shortLength * normInt(N, O);
	right = ((norm(O) - norm(P)) - P * normInt(O, P)) * (-sigma);

	result = (longCount * (left + middle + right)) / longLength;

	if (result > 0)
		return (IDnum) result;
	else
		return 0;
}
Exemplo n.º 26
0
static void projectFromReadPair(Node * node, ReadOccurence * readOccurence,
				Coordinate position, Coordinate offset,
				Coordinate insertLength,
				double insertVariance, boolean weight)
{
	Coordinate distance = insertLength;
	Coordinate variance = insertVariance;
	Node *target = getNodeInGraph(graph, readOccurence->nodeID);
	Connection *connect;
	double score;

	// Filter for useless reads:
	if (readOccurence->position == -1 && readOccurence->offset == -1)
		return;

	if (target == getTwinNode(node) || target == node)
		return;

	if (getUniqueness(target) && getNodeID(target) < getNodeID(node))
		return;

	if (weight) {
		if (position > 0 && readOccurence->position > 0
		    && (connect =
			getConnectionBetweenNodes(node, target))) {
			distance = getConnectionDistance(connect);
			distance -=
			    position - offset - getNodeLength(node) / 2;
			distance -=
			    readOccurence->position -
			    readOccurence->offset -
			    getNodeLength(target) / 2;
			score =
			    K *
			    exp((insertLength - distance) * (distance -
							     insertLength)
				/ (2 * insertVariance));

			incrementConnectionWeight(connect, score);
		}
		return;
	}

	if (position < 0) {
		variance += getNodeLength(node) * getNodeLength(node) / 16;
		// distance += 0;
	} else {
		// variance += 0;
		distance += position - offset - getNodeLength(node) / 2;
	}

	if (readOccurence->position < 0) {
		variance +=
		    getNodeLength(target) * getNodeLength(target) / 16;
		//distance += 0;
	} else {
		// variance += 0;
		distance +=
		    readOccurence->position - readOccurence->offset -
		    getNodeLength(target) / 2;
	}

	if (distance - getNodeLength(node) / 2 -
	    getNodeLength(target) / 2 < -6 * sqrt(insertVariance))
		return;

	createConnection(getNodeID(node), getNodeID(target), 0, 1,
			 distance, variance);
}
Exemplo n.º 27
0
static void projectFromSingleRead(Node * node,
				  ReadOccurence * readOccurence,
				  Coordinate position,
				  Coordinate offset, Coordinate length,
				  boolean weight)
{
	Coordinate distance = 0;
	Connection *connect;
	Node *target = getNodeInGraph(graph, -readOccurence->nodeID);
	double variance = 1;

	// Filter out troublemakers
	if (readOccurence->position == -1 && readOccurence->offset == -1)
		return;

	if (offset < 0 || readOccurence->offset < 0)
		return;

	if (target == getTwinNode(node) || target == node)
		return;

	if (weight) {
		if ((connect = getConnectionBetweenNodes(node, target))) {
			incrementConnectionWeight(connect, 1);
		} else if ((connect = getConnectionBetweenNodes(getTwinNode(node), getTwinNode(target)))) {
			incrementConnectionWeight(connect, 1);
		} 
		return;
	}

	if (position < 0) {
		variance += getNodeLength(node) * getNodeLength(node) / 16;
		distance += getNodeLength(node) / 2;
	} else {
		// variance += 0;
		distance += position - offset - getNodeLength(node) / 2;
	}

	if (readOccurence->position < 0) {
		variance +=
		    getNodeLength(target) * getNodeLength(target) / 16;
		distance += getNodeLength(target) / 2;
	} else {
		// variance += 0;
		distance +=
		    -readOccurence->position + readOccurence->offset +
		    getNodeLength(target) / 2;
	}

	if (offset < readOccurence->offset) {
		if (getNodeLength(node) % 2)
			distance--;
		createConnection(getNodeID(node), getNodeID(target), 1, 0,
				 distance, variance);
	} else {
		if (getNodeLength(target) % 2)
			distance++;
		createConnection(-getNodeID(node), -getNodeID(target), 1,
				 0, -distance, variance);
	}
}
Exemplo n.º 28
0
static KmerOccurenceTable *referenceGraphKmers(char *preGraphFilename,
					       short int accelerationBits, Graph * graph, boolean double_strand, NodeMask * nodeMasks, Coordinate nodeMaskCount)
{
	FILE *file = fopen(preGraphFilename, "r");
	const int maxline = MAXLINE;
	char line[MAXLINE];
	char c;
	int wordLength;
	Coordinate lineLength, kmerCount;
	Kmer word;
	Kmer antiWord;
	KmerOccurenceTable *kmerTable;
	IDnum index;
	IDnum nodeID = 0;
	Nucleotide nucleotide;
	NodeMask * nodeMask = nodeMasks; 
	Coordinate nodeMaskIndex = 0;

	if (file == NULL)
		exitErrorf(EXIT_FAILURE, true, "Could not open %s", preGraphFilename);

	// Count kmers
	velvetLog("Scanning pre-graph file %s for k-mers\n",
		  preGraphFilename);

	// First  line
	if (!fgets(line, maxline, file))
		exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
	sscanf(line, "%*i\t%*i\t%i\n", &wordLength);

	kmerTable = newKmerOccurenceTable(accelerationBits, wordLength);

	// Read nodes
	if (!fgets(line, maxline, file))
		exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
	kmerCount = 0;
	while (line[0] == 'N') {
		lineLength = 0;
		while ((c = getc(file)) != EOF && c != '\n')
			lineLength++;
		kmerCount += lineLength - wordLength + 1;
		if (fgets(line, maxline, file) == NULL)
			break;
	}

	velvetLog("%li kmers found\n", (long) kmerCount);

	for(nodeMaskIndex = 0; nodeMaskIndex < nodeMaskCount; nodeMaskIndex++) {
		kmerCount -= nodeMasks[nodeMaskIndex].finish -
nodeMasks[nodeMaskIndex].start;
	}

	nodeMaskIndex = 0;

	fclose(file);

	// Create table
	allocateKmerOccurences(kmerCount, kmerTable);

	// Fill table
	file = fopen(preGraphFilename, "r");
	if (file == NULL)
		exitErrorf(EXIT_FAILURE, true, "Could not open %s", preGraphFilename);

	if (!fgets(line, maxline, file))
		exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");

	// Read nodes
	if (!fgets(line, maxline, file))
		exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
	while (line[0] == 'N') {
		nodeID++;

		// Fill in the initial word : 
		clearKmer(&word);
		clearKmer(&antiWord);

		for (index = 0; index < wordLength - 1; index++) {
			c = getc(file);
			if (c == 'A')
				nucleotide = ADENINE;
			else if (c == 'C')
				nucleotide = CYTOSINE;
			else if (c == 'G')
				nucleotide = GUANINE;
			else if (c == 'T')
				nucleotide = THYMINE;
			else if (c == '\n')
				exitErrorf(EXIT_FAILURE, true, "PreGraph file incomplete");
			else
				nucleotide = ADENINE;
				

			pushNucleotide(&word, nucleotide);
			if (double_strand) {
#ifdef COLOR
				reversePushNucleotide(&antiWord, nucleotide);
#else
				reversePushNucleotide(&antiWord, 3 - nucleotide);
#endif
			}
		}

		// Scan through node
		index = 0;
		while((c = getc(file)) != '\n' && c != EOF) {
			if (c == 'A')
				nucleotide = ADENINE;
			else if (c == 'C')
				nucleotide = CYTOSINE;
			else if (c == 'G')
				nucleotide = GUANINE;
			else if (c == 'T')
				nucleotide = THYMINE;
			else
				nucleotide = ADENINE;

			pushNucleotide(&word, nucleotide);
			if (double_strand) {
#ifdef COLOR
				reversePushNucleotide(&antiWord, nucleotide);
#else
				reversePushNucleotide(&antiWord, 3 - nucleotide);
#endif
			}

			// Update mask if necessary 
			if (nodeMask) { 
				if (nodeMask->nodeID < nodeID || (nodeMask->nodeID == nodeID && index >= nodeMask->finish)) {
					if (++nodeMaskIndex == nodeMaskCount) 
						nodeMask = NULL;
					else 
						nodeMask++;
				}
			}

			// Check if not masked!
			if (nodeMask) { 
				if (nodeMask->nodeID == nodeID && index >= nodeMask->start && index < nodeMask->finish) {
					index++;
					continue;
				} 			
			}

			if (!double_strand || compareKmers(&word, &antiWord) <= 0)
				recordKmerOccurence(&word, nodeID, index, kmerTable);
			else
				recordKmerOccurence(&antiWord, -nodeID, getNodeLength(getNodeInGraph(graph, nodeID)) - 1 - index, kmerTable);

			index++;
		}

		if (fgets(line, maxline, file) == NULL)
			break;
	}

	fclose(file);

	// Sort table
	sortKmerOccurenceTable(kmerTable);

	return kmerTable;
}
Exemplo n.º 29
0
static void threadSequenceThroughGraph(TightString * tString,
				       KmerOccurenceTable * kmerTable,
				       Graph * graph,
				       IDnum seqID, Category category,
				       boolean readTracking,
				       boolean double_strand,
				       ReferenceMapping * referenceMappings,
				       Coordinate referenceMappingCount,
				       IDnum refCount,
				       Annotation * annotations,
				       IDnum annotationCount,
				       boolean second_in_pair)
{
	Kmer word;
	Kmer antiWord;
	Coordinate readNucleotideIndex;
	Coordinate kmerIndex;
	KmerOccurence *kmerOccurence;
	int wordLength = getWordLength(graph);

	PassageMarkerI marker = NULL_IDX;
	PassageMarkerI previousMarker = NULL_IDX;
	Node *node = NULL;
	Node *previousNode = NULL;
	Coordinate coord = 0;
	Coordinate previousCoord = 0;
	Nucleotide nucleotide;
	boolean reversed;

	IDnum refID;
	Coordinate refCoord = 0;
	ReferenceMapping * refMap;
	Annotation * annotation = annotations;
	Coordinate index = 0;
	Coordinate uniqueIndex = 0;
	Coordinate annotIndex = 0;
	IDnum annotCount = 0;
	SmallNodeList * nodePile = NULL;

	// Neglect any string shorter than WORDLENGTH :
	if (getLength(tString) < wordLength)
		return;

	clearKmer(&word);
	clearKmer(&antiWord);

	// Fill in the initial word : 
	for (readNucleotideIndex = 0;
	     readNucleotideIndex < wordLength - 1; readNucleotideIndex++) {
		nucleotide = getNucleotide(readNucleotideIndex, tString);
		pushNucleotide(&word, nucleotide);
		if (double_strand || second_in_pair) {
#ifdef COLOR
			reversePushNucleotide(&antiWord, nucleotide);
#else
			reversePushNucleotide(&antiWord, 3 - nucleotide);
#endif
		}
	}

	// Go through sequence
	while (readNucleotideIndex < getLength(tString)) {
		nucleotide = getNucleotide(readNucleotideIndex++, tString);
		pushNucleotide(&word, nucleotide);
		if (double_strand || second_in_pair) {
#ifdef COLOR
			reversePushNucleotide(&antiWord, nucleotide);
#else
			reversePushNucleotide(&antiWord, 3 - nucleotide);
#endif
		}

		// Update annotation if necessary
		if (annotCount < annotationCount && annotIndex == getAnnotationLength(annotation)) {
			annotation = getNextAnnotation(annotation);
			annotCount++;
			annotIndex = 0;
		}

		// Search for reference mapping
		if (category == REFERENCE) {
			if (referenceMappings) 
				refMap = findReferenceMapping(seqID, index, referenceMappings, referenceMappingCount);
			else 
				refMap = NULL;

			if (refMap) {
				node = getNodeInGraph(graph, refMap->nodeID);
				if (refMap->nodeID > 0) {
					coord = refMap->nodeStart + (index - refMap->referenceStart);
				} else {
					coord = getNodeLength(node) - refMap->nodeStart - refMap->length + (index - refMap->referenceStart);
				}
			} else  {
				node = NULL;
				if (previousNode)
					break;
			}
		}
		// Search for reference-based mapping
		else if (annotCount < annotationCount && uniqueIndex >= getPosition(annotation) && getAnnotSequenceID(annotation) <= refCount && getAnnotSequenceID(annotation) >= -refCount) {
			refID = getAnnotSequenceID(annotation);
			if (refID > 0)
				refCoord = getStart(annotation) + annotIndex; 
			else
				refCoord = getStart(annotation) - annotIndex; 
			
			refMap = findReferenceMapping(refID, refCoord, referenceMappings, referenceMappingCount);
			// If success
			if (refMap) {
				if (refID > 0) {
					node = getNodeInGraph(graph, refMap->nodeID);
					if (refMap->nodeID > 0) {
						coord = refMap->nodeStart + (refCoord - refMap->referenceStart);
					} else {
						coord = getNodeLength(node) - refMap->nodeStart - refMap->length + (refCoord - refMap->referenceStart);
					}
				} else {
					node = getNodeInGraph(graph, -refMap->nodeID);
					if (refMap->nodeID > 0) {
						coord =  getNodeLength(node) - refMap->nodeStart - (refCoord - refMap->referenceStart) - 1;
					} else {
						coord = refMap->nodeStart + refMap->length - (refCoord - refMap->referenceStart) - 1;
					}
				}
			} else  {
				node = NULL;
				if (previousNode)
					break;
			}
		}		
		// Search in table
		else {
			reversed = false;
			if (double_strand) {
				if (compareKmers(&word, &antiWord) <= 0) {
					kmerOccurence =
					findKmerInKmerOccurenceTable(&word,
								       kmerTable);
				} else { 
					kmerOccurence =
					       findKmerInKmerOccurenceTable(&antiWord,
						kmerTable);
					reversed = true;
				}
			} else {
				if (!second_in_pair) {
					kmerOccurence =
					findKmerInKmerOccurenceTable(&word,
								       kmerTable);
				} else { 
					kmerOccurence =
					       findKmerInKmerOccurenceTable(&antiWord,
						kmerTable);
					reversed = true;
				}
			}
			
			if (kmerOccurence) {
				if (!reversed) {
					node = getNodeInGraph(graph, getKmerOccurenceNodeID(kmerOccurence));
					coord = getKmerOccurencePosition(kmerOccurence);
				} else {
					node = getNodeInGraph(graph, -getKmerOccurenceNodeID(kmerOccurence));
					coord = getNodeLength(node) - getKmerOccurencePosition(kmerOccurence) - 1;
				}
			} else {
				node = NULL;
				if (previousNode) 
					break;
			}
		}

		// Increment positions
		if (annotCount < annotationCount && uniqueIndex >= getPosition(annotation)) 
			annotIndex++;
		else
			uniqueIndex++;

		// Fill in graph
		if (node)
		{
#ifdef OPENMP
			lockNode(node);
#endif
			kmerIndex = readNucleotideIndex - wordLength;

			if (previousNode == node
			    && previousCoord == coord - 1) {
				if (category / 2 >= CATEGORIES) {
					setPassageMarkerFinish(marker,
							       kmerIndex +
							       1);
					setFinishOffset(marker,
							getNodeLength(node)
							- coord - 1);
				} else {
#ifndef SINGLE_COV_CAT
					incrementVirtualCoverage(node, category / 2, 1);
					incrementOriginalVirtualCoverage(node, category / 2, 1);
#else
					incrementVirtualCoverage(node, 1);
#endif
				}
#ifdef OPENMP
				unLockNode(node);
#endif
			} else {
				if (category / 2 >= CATEGORIES) {
					marker =
					    newPassageMarker(seqID,
							     kmerIndex,
							     kmerIndex + 1,
							     coord,
							     getNodeLength
							     (node) -
							     coord - 1);
					transposePassageMarker(marker,
							       node);
					connectPassageMarkers
					    (previousMarker, marker,
					     graph);
					previousMarker = marker;
				} else {
					if (readTracking) {
						if (!isNodeMemorized(node, nodePile)) {
							addReadStart(node,
								     seqID,
								     coord,
								     graph,
								     kmerIndex);
							memorizeNode(node, &nodePile);
						} else {
							blurLastShortReadMarker
							    (node, graph);
						}
					}

#ifndef SINGLE_COV_CAT
					incrementVirtualCoverage(node, category / 2, 1);
					incrementOriginalVirtualCoverage(node, category / 2, 1);
#else
					incrementVirtualCoverage(node, 1);
#endif
				}
#ifdef OPENMP
				lockTwoNodes(node, previousNode);
#endif
				createArc(previousNode, node, graph);
#ifdef OPENMP
				unLockTwoNodes(node, previousNode);
#endif
			}

			previousNode = node;
			previousCoord = coord;
		}
		index++;
	}

	if (readTracking && category / 2 < CATEGORIES)
		unMemorizeNodes(&nodePile);
}
Exemplo n.º 30
0
static void extractNodeASEvents(Node * node, Locus * locus)
{
	Node *nodeA, *nodeB, *nodeC;
	Event *event;

	// If linear or more than 2 outgoing arcs: ignore
	if (countActiveConnections(node) != 2)
		return;

	// Follow the two active arcs
	nodeA =
	    getTwinNode(getConnectionDestination
			(getActiveConnection(node)));
	nodeB =
	    getTwinNode(getConnectionDestination
			(getSecondActiveConnection(node)));

	// A should be the longer of the two
	if (getNodeLength(nodeA) < getNodeLength(nodeB)) {
		nodeC = nodeA;
		nodeA = nodeB;
		nodeB = nodeC;
		nodeC = NULL;
	}
	// If both very short, ignore:
	if (getNodeLength(nodeA) < 2 * getWordLength(graph) - 1)
		return;

	if (getNodeLength(nodeB) < 2 * getWordLength(graph) - 1) {
		if (countActiveConnections(nodeA) != 1
		    || countActiveConnections(nodeB) != 1
		    || getConnectionDestination(getActiveConnection(nodeA))
		    !=
		    getConnectionDestination(getActiveConnection(nodeB)))
			return;

		nodeC =
		    getTwinNode(getConnectionDestination
				(getActiveConnection(nodeA)));

		// Intron retention
		if (donorSiteAtJunction(node, nodeA)
		    && acceptorSiteAtJunction(nodeA, nodeC)) {
			event = allocateEvent();
			event->type = intron_retention;
			event->nodes[0] = node;
			event->nodes[1] = nodeA;
			event->nodes[2] = nodeB;
			event->nodes[3] = nodeC;
			event->next = locus->event;
			locus->event = event;
		}
		// Alternative 5' splice site
		else if (donorSiteAtJunction(node, nodeA)) {
			event = allocateEvent();
			event->type = alternative_5prime_splice;
			event->nodes[0] = node;
			event->nodes[1] = nodeA;
			event->nodes[2] = nodeB;
			event->nodes[3] = nodeC;
			event->next = locus->event;
			locus->event = event;
		}
		// Alternative 3' splice site
		else if (acceptorSiteAtJunction(nodeA, nodeC)) {
			event = allocateEvent();
			event->type = alternative_3prime_splice;
			event->nodes[0] = node;
			event->nodes[1] = nodeA;
			event->nodes[2] = nodeB;
			event->nodes[3] = nodeC;
			event->next = locus->event;
			locus->event = event;
		}
		// Skipped exon
		else {
			event = allocateEvent();
			event->type = skipped_exon;
			event->nodes[0] = node;
			event->nodes[1] = nodeA;
			event->nodes[2] = nodeB;
			event->nodes[3] = nodeC;
			event->next = locus->event;
			locus->event = event;
		}
	} else {
		// Alt. poly A:
		if (finishesWithPAS(node) && finishesWithPAS(nodeA)) {
			event = allocateEvent();
			event->type = alternative_polyA;
			event->nodes[0] = node;
			event->nodes[1] = nodeA;
			event->nodes[2] = nodeB;
			event->nodes[3] = NULL;
			event->next = locus->event;
			locus->event = event;
		}
		// Mutually exclusive exons
		if (countActiveConnections(nodeA) == 1
		    && countActiveConnections(nodeB) == 1
		    && getConnectionDestination(getActiveConnection(nodeA))
		    ==
		    getConnectionDestination(getActiveConnection(nodeB))) {
			event = allocateEvent();
			event->type = mutually_exclusive_exons;
			event->nodes[0] = node;
			event->nodes[1] = nodeA;
			event->nodes[2] = nodeB;
			event->nodes[3] =
			    getTwinNode(getConnectionDestination
					(getActiveConnection(nodeA)));
			event->next = locus->event;
			locus->event = event;
		}
	}
}